# -*- coding: utf-8 -*- import os import uno import unohelper import re import json import pandas as pd from pathlib import Path from difflib import SequenceMatcher # RapidFuzz für Fuzzy-Suche try: from rapidfuzz import fuzz RAPIDFUZZ_AVAILABLE = True except ImportError: RAPIDFUZZ_AVAILABLE = False # Spacy Lemmatizer try: import spacy nlp = spacy.load("de_core_news_sm") SPACY_AVAILABLE = True except: SPACY_AVAILABLE = False nlp = None # ========================= # Pfade & Config # ========================= SCRIPT_DIR = Path("/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro") NV_MASTER_FILE = SCRIPT_DIR / "NV_MASTER.ods" CACHE_FILE = SCRIPT_DIR / "mapper_cache.json" LOG_FILE = SCRIPT_DIR / "mapper_log.txt" STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"} # ========================= # Cache & Logging # ========================= if CACHE_FILE.exists(): with open(CACHE_FILE,"r",encoding="utf-8") as f: CACHE = json.load(f) else: CACHE = {} def save_cache(): with open(CACHE_FILE,"w",encoding="utf-8") as f: json.dump(CACHE, f, indent=2, ensure_ascii=False) def log(msg): with open(LOG_FILE,"a",encoding="utf-8") as f: f.write(msg + "\n") # ========================= # Textverarbeitung # ========================= def normalize_text(s): if not s: return "" s = str(s).lower().strip() s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s) s = re.sub(r"\s+"," ",s) return s lemma_cache = {} def lemmatize_term(term): term_norm = normalize_text(term) if term_norm in lemma_cache: return lemma_cache[term_norm] if SPACY_AVAILABLE and nlp: doc = nlp(term_norm) lemma = " ".join([token.lemma_ for token in doc]) else: lemma = term_norm lemma_cache[term_norm] = lemma return lemma def compound_split(term): parts = re.findall(r'[A-ZÄÖÜa-zäöü]+', term) return parts if parts else [term] # ========================= # NV_MASTER laden # ========================= def load_normvokabular(file_path): sheets = pd.read_excel(file_path, sheet_name=None, engine="odf") norm_dict = {} for sheet_name, df in sheets.items(): df = df.dropna(how="all", axis=1) df.columns = [str(c).strip() for c in df.columns] if "ID" not in df.columns or "Wort/Vokabel" not in df.columns: continue current_parent_id = None for _, row in df.iterrows(): row_id = str(row["ID"]).strip() if pd.notna(row["ID"]) else None row_word = str(row["Wort/Vokabel"]).strip() if pd.notna(row["Wort/Vokabel"]) else None if row_id: current_parent_id = row_id if not row_word: continue norm_dict[normalize_text(row_word)] = { "ID": current_parent_id, "Wort/Vokabel": row_word } return norm_dict # ========================= # Mapping # ========================= def map_term_with_indexes(term, norm_dict): term_norm = normalize_text(term) term_lemma = lemmatize_term(term_norm) # Cache prüfen if term_lemma in CACHE: cached = CACHE[term_lemma] if isinstance(cached, dict) and all(k in cached for k in ("hits","suggestions","ids")): return cached["hits"], cached["suggestions"], cached["ids"] else: CACHE.pop(term_lemma, None) hits = [] suggestions = [] ids = [] # Exakte Treffer if term_norm in norm_dict: e = norm_dict[term_norm] hits.append(e["Wort/Vokabel"]) ids.append(e["ID"]) elif term_lemma in norm_dict: e = norm_dict[term_lemma] hits.append(e["Wort/Vokabel"]) ids.append(e["ID"]) else: # Fuzzy Matching for key, e in norm_dict.items(): score = fuzz.token_sort_ratio(term_lemma, key)/100.0 if RAPIDFUZZ_AVAILABLE else SequenceMatcher(None, term_lemma, key).ratio() if score >= 0.75: suggestions.append(e["Wort/Vokabel"]) ids.append(e["ID"]) CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids} return hits, suggestions, ids # ========================= # LibreOffice Dialog (ListBox + Checkbox) # ========================= def apply_proposals_dialog(): ctx = uno.getComponentContext() smgr = ctx.ServiceManager desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) doc = desktop.getCurrentComponent() if not doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"): log("Kein Calc-Dokument aktiv") return selection = doc.CurrentSelection sheet = doc.CurrentController.ActiveSheet # Prüfen ob eine Zelle ausgewählt ist if selection is None or not hasattr(selection, "getCellAddress"): log("Keine Zelle ausgewählt") return cell = selection # Spalte überprüfen header_row = sheet.getCellRangeByPosition(0,0,sheet.Columns.Count-1,0) objekt_col = None norm_vorschlag_col = None for col_idx in range(sheet.Columns.Count): val = sheet.getCellByPosition(col_idx,0).String if val.strip().lower() == "objektbeschreibung": objekt_col = col_idx elif val.strip().lower() == "norm_vorschlag": norm_vorschlag_col = col_idx if norm_vorschlag_col is None or objekt_col is None: log("Spalte 'Norm_Vorschlag' oder 'Objektbeschreibung' nicht gefunden") return # Vorschläge auslesen proposals_str = sheet.getCellByPosition(norm_vorschlag_col, cell.RangeAddress.StartRow).String if not proposals_str.strip(): log("Keine Vorschläge in der ausgewählten Zelle") return proposals = [p.strip() for p in proposals_str.split(";") if p.strip()] # Dialog erstellen toolkit = smgr.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx) dialog_model = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialogModel", ctx) dialog_model.Width = 180 dialog_model.Height = 150 dialog_model.Title = "Vorschläge übernehmen" # ListBox lb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlListBoxModel") lb_model.Name = "ProposalList" lb_model.PositionX = 10 lb_model.PositionY = 10 lb_model.Width = 160 lb_model.Height = 80 lb_model.StringItemList = tuple(proposals) dialog_model.insertByName("ProposalList", lb_model) # Checkbox cb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlCheckBoxModel") cb_model.Name = "AllCheck" cb_model.PositionX = 10 cb_model.PositionY = 95 cb_model.Width = 160 cb_model.Height = 15 cb_model.Label = "Alle Vorschläge übernehmen" dialog_model.insertByName("AllCheck", cb_model) # OK-Button btn_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel") btn_model.Name = "OKButton" btn_model.PositionX = 10 btn_model.PositionY = 115 btn_model.Width = 80 btn_model.Height = 20 btn_model.Label = "OK" dialog_model.insertByName("OKButton", btn_model) # Abbrechen-Button cancel_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel") cancel_model.Name = "CancelButton" cancel_model.PositionX = 100 cancel_model.PositionY = 115 cancel_model.Width = 80 cancel_model.Height = 20 cancel_model.Label = "Abbrechen" dialog_model.insertByName("CancelButton", cancel_model) # Control Dialog dialog = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialog", ctx) dialog.setModel(dialog_model) dialog.setVisible(True) toolkit.createPeer(dialog, None) # Warten auf OK while True: import time time.sleep(0.1) # Prüfen auf Klick if dialog.getControl("OKButton").Pressed: all_flag = dialog.getControl("AllCheck").State == 1 selected_idx = dialog.getControl("ProposalList").SelectedItems if selected_idx: selected_proposal = proposals[selected_idx[0]] else: selected_proposal = None break elif dialog.getControl("CancelButton").Pressed: dialog.endExecute() return # Anwenden obj_cell = sheet.getCellByPosition(objekt_col, cell.RangeAddress.StartRow) obj_text = obj_cell.String if all_flag: for prop in proposals: idx = obj_text.lower().find(prop.lower()) if idx != -1: obj_text = obj_text[:idx] + prop + obj_text[idx+len(prop):] else: if selected_proposal: idx = obj_text.lower().find(selected_proposal.lower()) if idx != -1: obj_text = obj_text[:idx] + selected_proposal + obj_text[idx+len(selected_proposal):] obj_cell.String = obj_text obj_cell.CellBackColor = 0x00FF00 # grün dialog.endExecute() save_cache() log(f"Vorschlag übernommen: {obj_text}") # ========================= # Automatische Button-Registrierung # ========================= def register_toolbar_button(): ctx = uno.getComponentContext() smgr = ctx.ServiceManager desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) doc = desktop.getCurrentComponent() frame = doc.CurrentController.Frame # Button kann manuell über Makro-Menü an Toolbar gebunden werden # Hier wird nur das Makro selbst registriert # Symbolleiste muss in LO einmalig erstellt werden # ========================= # Hauptmakro # ========================= def run_mapper_macro(): try: norm_dict = load_normvokabular(NV_MASTER_FILE) log(f"NV_MASTER geladen ({len(norm_dict)} Begriffe)") apply_proposals_dialog() except Exception as e: log(f"Fehler in run_mapper_macro: {e}")