diff --git a/mapper_gui.py b/mapper_gui.py new file mode 100644 index 0000000..ea41649 --- /dev/null +++ b/mapper_gui.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +import tkinter as tk +import json +import os +import threading +import time +import pandas as pd +import re +import spacy +from rapidfuzz import fuzz + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TMP_DIR = os.path.join(BASE_DIR, "tmp") +ACTIVE_FILE = os.path.join(TMP_DIR, "active_term.json") +CACHE_FILE = os.path.join(BASE_DIR, "mapper_cache_2.3.json") +NV_MASTER_PATH = os.path.join(BASE_DIR, "NV_MASTER.ods") +STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"} +CONF_THRESHOLD = 0.75 +MAX_SUGGESTIONS = 20 + +# ------------------------- +# Logging +# ------------------------- +def log(msg): + print(msg) + +# ------------------------- +# NV_MASTER laden +# ------------------------- +def normalize_text(s): + if not s: + return "" + s = str(s).strip().lower() + s = re.sub(r"[\(\)\[\]\"'\\;:\?!,\.]", "", s) + s = re.sub(r"\s+", " ", s) + return s + +nlp = spacy.load("de_core_news_sm") + +lemma_cache = {} +def lemmatize_term(term): + term_norm = normalize_text(term) + if term_norm in lemma_cache: + return lemma_cache[term_norm] + doc = nlp(term_norm) + lemma = " ".join([token.lemma_ for token in doc]) + lemma_cache[term_norm] = lemma + return lemma + +def build_norm_index(nv_path): + norm_dict = {} + lemma_index = {} + sheets = pd.read_excel(nv_path, sheet_name=None, engine="odf") + for sheet_name, df in sheets.items(): + if str(sheet_name).strip().lower() == "master": + continue + df = df.fillna("") + cols = [str(c).strip().lower() for c in df.columns] + id_col = None + word_col = None + for i, c in enumerate(cols): + if "id" in c: + id_col = df.columns[i] + if "wort" in c or "vokabel" in c: + word_col = df.columns[i] + if word_col is None and len(df.columns) >= 1: + word_col = df.columns[-1] + if id_col is None and len(df.columns) >= 1: + id_col = df.columns[0] + current_parent_id = None + for _, row in df.iterrows(): + id_val = str(row[id_col]).strip() if id_col in df.columns else "" + word_val = str(row[word_col]).strip() if word_col in df.columns else "" + if id_val: + current_parent_id = id_val + if not word_val: + continue + norm_name = normalize_text(word_val) + lemma = lemmatize_term(word_val) + entry = {"Name": word_val.strip(), "ID": current_parent_id or "", "Sheet": sheet_name} + norm_dict.setdefault(norm_name, []).append(entry) + lemma_index.setdefault(lemma, []).append(entry) + log(f"NV_MASTER geladen. Begriffe: {sum(len(v) for v in norm_dict.values())}") + return norm_dict, lemma_index + +# ------------------------- +# Matching +# ------------------------- +def fuzzy_score(a, b): + return fuzz.token_set_ratio(a, b)/100.0 + +def get_suggestions(term, norm_dict, lemma_index, threshold=CONF_THRESHOLD): + term_norm = normalize_text(term) + term_lemma = lemmatize_term(term) + candidates = [] + for key_lemma, entries in lemma_index.items(): + score = fuzzy_score(term_lemma, key_lemma) + if key_lemma.startswith(term_lemma): + score = min(score + 0.1, 1.0) + if score >= threshold: + for e in entries: + candidates.append((score, e["Name"], e["ID"])) + for norm_key, entries in norm_dict.items(): + score = fuzzy_score(term_lemma, norm_key) + if norm_key.startswith(term_lemma): + score = min(score + 0.1, 1.0) + if score >= threshold: + for e in entries: + candidates.append((score, e["Name"], e["ID"])) + candidates.sort(key=lambda t: t[0], reverse=True) + seen = set() + results = [] + for score, name, id_ in candidates: + key = (name, id_) + if key in seen: + continue + seen.add(key) + results.append(f"{name} ({id_})" if id_ else name) + if len(results) >= MAX_SUGGESTIONS: + break + return results + +# ------------------------- +# GUI +# ------------------------- +class SuggestionPopup(tk.Tk): + def __init__(self, norm_dict, lemma_index): + super().__init__() + self.norm_dict = norm_dict + self.lemma_index = lemma_index + self.geometry("+1000+700") # unten rechts + self.overrideredirect(True) + self.configure(bg="white") + self.label = tk.Label(self, text="", justify="left", bg="white", anchor="nw") + self.label.pack(padx=5, pady=5) + self.last_term = None + self.check_loop() + + def check_loop(self): + term = None + try: + if os.path.exists(ACTIVE_FILE): + with open(ACTIVE_FILE, "r", encoding="utf-8") as f: + data = json.load(f) + term = data.get("term") + except Exception: + term = None + if term != self.last_term: + self.last_term = term + if term: + suggestions = get_suggestions(term, self.norm_dict, self.lemma_index) + self.label.config(text="\n".join(suggestions)) + self.deiconify() + else: + self.withdraw() + self.after(300, self.check_loop) # alle 300ms prüfen + +def main(): + norm_dict, lemma_index = build_norm_index(NV_MASTER_PATH) + app = SuggestionPopup(norm_dict, lemma_index) + app.mainloop() + +if __name__ == "__main__": + main() diff --git a/mapper_macro_listener.py b/mapper_macro_listener.py new file mode 100644 index 0000000..7413497 --- /dev/null +++ b/mapper_macro_listener.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +import os +import json +import time +import threading + +BASE_DIR = "/home/jarnold/.config/libreoffice/4/user/Scripts/python/Vokabular_Abgleich_Makro" +TMP_DIR = os.path.join(BASE_DIR, "tmp") +ACTIVE_FILE = os.path.join(TMP_DIR, "active_term.json") + +def write_active_term(term): + os.makedirs(TMP_DIR, exist_ok=True) + if term: + with open(ACTIVE_FILE, "w", encoding="utf-8") as f: + json.dump({"term": term}, f, ensure_ascii=False) + else: + if os.path.exists(ACTIVE_FILE): + os.remove(ACTIVE_FILE) + +def poll_objektbeschreibung(doc, interval=0.5): + """Polling-Funktion für aktive Zelle""" + sheet = doc.CurrentController.ActiveSheet + + # Spalte Objektbeschreibung finden + objekt_col = None + cursor = sheet.createCursor() + cursor.gotoStartOfUsedArea(False) + cursor.gotoEndOfUsedArea(True) + for r in range(0, min(5, cursor.RangeAddress.EndRow+1)): + for c in range(0, cursor.RangeAddress.EndColumn+1): + val = str(sheet.getCellByPosition(c,r).String).strip().lower() + if val == "objektbeschreibung": + objekt_col = c + break + if objekt_col is not None: + break + if objekt_col is None: + return # Spalte nicht gefunden + + last_row = None + last_value = None + + while True: + try: + controller = doc.CurrentController + cell = controller.getActiveCell() + if cell.CellAddress.Column == objekt_col: + term = str(cell.String).strip() + if term != last_value or cell.CellAddress.Row != last_row: + write_active_term(term) + last_value = term + last_row = cell.CellAddress.Row + else: + if last_value is not None: + write_active_term(None) + last_value = None + except Exception: + pass + time.sleep(interval) + +def start_polling(): + doc = XSCRIPTCONTEXT.getDocument() + # Polling in Hintergrund-Thread + threading.Thread(target=poll_objektbeschreibung, args=(doc,), daemon=True).start() + +g_exportedScripts = (start_polling,) diff --git a/nvWindow.py b/nvWindow.py new file mode 100644 index 0000000..aa630fa --- /dev/null +++ b/nvWindow.py @@ -0,0 +1,235 @@ +# -*- coding: utf-8 -*- +""" +=============================================================================== +nvWindow.py – Tkinter-Vorschlagsfenster für NV_MASTER-Abgleich (LibreOffice) +=============================================================================== + +Zweck: + Dieses Skript öffnet ein Tkinter-Fenster, das Live-Vorschläge zu Begriffen + aus der Datei NV_MASTER.ods anzeigt. Es ist so ausgelegt, dass es direkt + mit LibreOffice Calc über UNO kommunizieren kann – sofern möglich. + +Robustheit: + Das Skript erkennt automatisch, ob UNO verfügbar und kompatibel ist: + - wenn ja → direkte Integration mit LibreOffice (bidirektional) + - wenn nein → Fallback in isolierten Modus, aber weiterhin lauffähig + Alle Ereignisse, Fehler und Systemzustände werden geloggt. + +Start LibreOffice (empfohlen): + soffice --calc --accept="socket,host=localhost,port=2002;urp;" --nologo --norestore & + +Start der Anwendung: + /usr/lib/libreoffice/program/python3 ~/projects/nvWindow/nvWindow.py + (oder notfalls:) python3 ~/projects/nvWindow/nvWindow.py + +Abhängigkeiten: + pip install rapidfuzz odfpy + +=============================================================================== +""" + +import os +import sys +import traceback +import tkinter as tk +from datetime import datetime +from rapidfuzz import process, fuzz +from odf.opendocument import load +from odf.table import Table, TableRow, TableCell +from odf.text import P + + +# ------------------------------------------------------------------------------ +# 1. Verzeichnis-Setup und Logging +# ------------------------------------------------------------------------------ + +BASE_DIR = os.path.expanduser("~/projects/nvWindow") +LOG_FILE = os.path.join(BASE_DIR, "nvWindow.log") +NV_MASTER_PATH = os.path.join(BASE_DIR, "NV_MASTER.ods") + + +def log(msg): + """Schreibt Zeitstempel und Nachricht in die Logdatei.""" + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + try: + with open(LOG_FILE, "a", encoding="utf-8") as f: + f.write(f"[{timestamp}] {msg}\n") + except Exception: + # im absoluten Notfall auf stdout ausweichen + print(f"[{timestamp}] {msg}") + + +# ------------------------------------------------------------------------------ +# 2. UNO-Initialisierung (robust, mit Fallback) +# ------------------------------------------------------------------------------ + +uno_available = False +desktop = None + +def init_uno(): + """Versucht UNO-Bridge zu initialisieren, erkennt Systemumgebung automatisch.""" + global uno_available, desktop + + # bereits aktiv in LibreOffice? (XSCRIPTCONTEXT vorhanden) + if "XSCRIPTCONTEXT" in globals(): + try: + ctx = XSCRIPTCONTEXT.getComponentContext() + smgr = ctx.ServiceManager + desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) + uno_available = True + log("UNO erkannt: Skript läuft innerhalb von LibreOffice.") + return + except Exception as e: + log(f"UNO innerhalb LO-Session nicht initialisierbar: {e}") + uno_available = False + return + + # außerhalb LibreOffice: versuche Systemintegration + try: + sys.path.append("/usr/lib/python3/dist-packages") + sys.path.append("/usr/lib/libreoffice/program") + os.environ["URE_BOOTSTRAP"] = ( + "vnd.sun.star.pathname:/usr/lib/libreoffice/program/fundamentalrc" + ) + + import uno + from com.sun.star.beans import PropertyValue + + try: + ctx = uno.getComponentContext() + smgr = ctx.ServiceManager + desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) + uno_available = True + log("UNO erfolgreich initialisiert – externe Verbindung aktiv.") + except Exception as inner: + log(f"UNO verfügbar, aber keine Desktop-Instanz: {inner}") + uno_available = False + except Exception as outer: + log(f"UNO-Import fehlgeschlagen: {outer}\n{traceback.format_exc()}") + uno_available = False + + +init_uno() + + +# ------------------------------------------------------------------------------ +# 3. NV_MASTER laden +# ------------------------------------------------------------------------------ + +def load_nv_master(filepath): + """Liest NV_MASTER.ods mit odfpy ein, extrahiert alle Textinhalte.""" + entries = [] + if not os.path.exists(filepath): + log(f"NV_MASTER.ods nicht gefunden: {filepath}") + return [] + + try: + doc = load(filepath) + for table in doc.getElementsByType(Table): + for row in table.getElementsByType(TableRow): + cells = row.getElementsByType(TableCell) + if not cells: + continue + cell_text = [] + for c in cells: + ps = c.getElementsByType(P) + for p in ps: + if p.firstChild: + text = str(p.firstChild.data).strip() + if text: + cell_text.append(text) + if cell_text: + entries.append(" ".join(cell_text)) + log(f"NV_MASTER geladen – {len(entries)} Begriffe erkannt.") + return entries + except Exception as e: + log(f"Fehler beim Lesen NV_MASTER: {e}\n{traceback.format_exc()}") + return [] + + +NV_MASTER_LIST = load_nv_master(NV_MASTER_PATH) + +if not NV_MASTER_LIST: + log("Abbruch: NV_MASTER.ods konnte nicht gelesen werden oder war leer.") + sys.exit("NV_MASTER.ods fehlt oder enthält keine Daten.") + + +# ------------------------------------------------------------------------------ +# 4. Tkinter-GUI +# ------------------------------------------------------------------------------ + +root = tk.Tk() +root.title("Objektbeschreibung – Vorschläge") +root.attributes("-topmost", True) +root.geometry("420x260+1500+800") +root.configure(bg="#202020") + +label = tk.Label( + root, + text="Live-Vorschläge für Objektbeschreibung:", + fg="white", + bg="#202020", + font=("Arial", 10, "bold"), +) +label.pack(pady=(10, 5)) + +entry = tk.Entry(root, width=52, font=("Arial", 11)) +entry.pack(pady=(0, 10)) + +listbox = tk.Listbox(root, width=52, height=9, font=("Arial", 10)) +listbox.pack(pady=5) + + +# ------------------------------------------------------------------------------ +# 5. Matching + UNO-Kommunikation +# ------------------------------------------------------------------------------ + +def update_suggestions(event=None): + """Aktualisiert Vorschläge bei jeder Tasteneingabe.""" + query = entry.get().strip() + listbox.delete(0, tk.END) + + if not query: + return + + try: + matches = process.extract(query, NV_MASTER_LIST, scorer=fuzz.token_set_ratio, limit=8) + for m in matches: + item, score, _ = m + listbox.insert(tk.END, f"{item} ({score:.1f}%)") + + if uno_available and desktop: + try: + doc = desktop.getCurrentComponent() + if not doc: + log("UNO: Kein aktives Dokument.") + return + sheet = doc.CurrentController.ActiveSheet + cell = sheet.getCurrentSelection() + if cell: + cell.String = query + except Exception as e: + log(f"UNO-Update-Fehler: {e}") + log(f"Suchanfrage '{query}' – {len(matches)} Vorschläge generiert.") + except Exception as e: + log(f"Fehler in update_suggestions(): {e}\n{traceback.format_exc()}") + + +entry.bind("", update_suggestions) + + +# ------------------------------------------------------------------------------ +# 6. Laufzeit / Beenden +# ------------------------------------------------------------------------------ + +log("nvWindow gestartet. Tkinter-Fenster aktiv.") +print("nvWindow läuft – Details siehe nvWindow.log") + +try: + root.mainloop() +except KeyboardInterrupt: + log("Manuell abgebrochen.") +except Exception as e: + log(f"Fehler in mainloop(): {e}\n{traceback.format_exc()}") +finally: + log("nvWindow beendet.")