diff --git a/mapper_macro_1.3.py b/mapper_macro_1.3.py new file mode 100644 index 0000000..1d42bd2 --- /dev/null +++ b/mapper_macro_1.3.py @@ -0,0 +1,297 @@ +# -*- coding: utf-8 -*- +import os +import uno +import unohelper +import re +import json +import pandas as pd +from pathlib import Path +from difflib import SequenceMatcher + +# RapidFuzz für Fuzzy-Suche +try: + from rapidfuzz import fuzz + RAPIDFUZZ_AVAILABLE = True +except ImportError: + RAPIDFUZZ_AVAILABLE = False + +# Spacy Lemmatizer +try: + import spacy + nlp = spacy.load("de_core_news_sm") + SPACY_AVAILABLE = True +except: + SPACY_AVAILABLE = False + nlp = None + +# ========================= +# Pfade & Config +# ========================= +SCRIPT_DIR = Path("/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro") +NV_MASTER_FILE = SCRIPT_DIR / "NV_MASTER.ods" +CACHE_FILE = SCRIPT_DIR / "mapper_cache.json" +LOG_FILE = SCRIPT_DIR / "mapper_log.txt" +STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"} + +# ========================= +# Cache & Logging +# ========================= +if CACHE_FILE.exists(): + with open(CACHE_FILE,"r",encoding="utf-8") as f: + CACHE = json.load(f) +else: + CACHE = {} + +def save_cache(): + with open(CACHE_FILE,"w",encoding="utf-8") as f: + json.dump(CACHE, f, indent=2, ensure_ascii=False) + +def log(msg): + with open(LOG_FILE,"a",encoding="utf-8") as f: + f.write(msg + "\n") + +# ========================= +# Textverarbeitung +# ========================= +def normalize_text(s): + if not s: return "" + s = str(s).lower().strip() + s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s) + s = re.sub(r"\s+"," ",s) + return s + +lemma_cache = {} +def lemmatize_term(term): + term_norm = normalize_text(term) + if term_norm in lemma_cache: + return lemma_cache[term_norm] + if SPACY_AVAILABLE and nlp: + doc = nlp(term_norm) + lemma = " ".join([token.lemma_ for token in doc]) + else: + lemma = term_norm + lemma_cache[term_norm] = lemma + return lemma + +def compound_split(term): + parts = re.findall(r'[A-ZÄÖÜa-zäöü]+', term) + return parts if parts else [term] + +# ========================= +# NV_MASTER laden +# ========================= +def load_normvokabular(file_path): + sheets = pd.read_excel(file_path, sheet_name=None, engine="odf") + norm_dict = {} + for sheet_name, df in sheets.items(): + df = df.dropna(how="all", axis=1) + df.columns = [str(c).strip() for c in df.columns] + if "ID" not in df.columns or "Wort/Vokabel" not in df.columns: + continue + current_parent_id = None + for _, row in df.iterrows(): + row_id = str(row["ID"]).strip() if pd.notna(row["ID"]) else None + row_word = str(row["Wort/Vokabel"]).strip() if pd.notna(row["Wort/Vokabel"]) else None + if row_id: current_parent_id = row_id + if not row_word: continue + norm_dict[normalize_text(row_word)] = { + "ID": current_parent_id, + "Wort/Vokabel": row_word + } + return norm_dict + +# ========================= +# Mapping +# ========================= +def map_term_with_indexes(term, norm_dict): + term_norm = normalize_text(term) + term_lemma = lemmatize_term(term_norm) + + # Cache prüfen + if term_lemma in CACHE: + cached = CACHE[term_lemma] + if isinstance(cached, dict) and all(k in cached for k in ("hits","suggestions","ids")): + return cached["hits"], cached["suggestions"], cached["ids"] + else: + CACHE.pop(term_lemma, None) + + hits = [] + suggestions = [] + ids = [] + + # Exakte Treffer + if term_norm in norm_dict: + e = norm_dict[term_norm] + hits.append(e["Wort/Vokabel"]) + ids.append(e["ID"]) + elif term_lemma in norm_dict: + e = norm_dict[term_lemma] + hits.append(e["Wort/Vokabel"]) + ids.append(e["ID"]) + else: + # Fuzzy Matching + for key, e in norm_dict.items(): + score = fuzz.token_sort_ratio(term_lemma, key)/100.0 if RAPIDFUZZ_AVAILABLE else SequenceMatcher(None, term_lemma, key).ratio() + if score >= 0.75: + suggestions.append(e["Wort/Vokabel"]) + ids.append(e["ID"]) + + CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids} + return hits, suggestions, ids + +# ========================= +# LibreOffice Dialog (ListBox + Checkbox) +# ========================= +def apply_proposals_dialog(): + ctx = uno.getComponentContext() + smgr = ctx.ServiceManager + desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) + doc = desktop.getCurrentComponent() + if not doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"): + log("Kein Calc-Dokument aktiv") + return + selection = doc.CurrentSelection + sheet = doc.CurrentController.ActiveSheet + + # Prüfen ob eine Zelle ausgewählt ist + if selection is None or not hasattr(selection, "getCellAddress"): + log("Keine Zelle ausgewählt") + return + cell = selection + + # Spalte überprüfen + header_row = sheet.getCellRangeByPosition(0,0,sheet.Columns.Count-1,0) + objekt_col = None + norm_vorschlag_col = None + for col_idx in range(sheet.Columns.Count): + val = sheet.getCellByPosition(col_idx,0).String + if val.strip().lower() == "objektbeschreibung": + objekt_col = col_idx + elif val.strip().lower() == "norm_vorschlag": + norm_vorschlag_col = col_idx + if norm_vorschlag_col is None or objekt_col is None: + log("Spalte 'Norm_Vorschlag' oder 'Objektbeschreibung' nicht gefunden") + return + + # Vorschläge auslesen + proposals_str = sheet.getCellByPosition(norm_vorschlag_col, cell.RangeAddress.StartRow).String + if not proposals_str.strip(): + log("Keine Vorschläge in der ausgewählten Zelle") + return + proposals = [p.strip() for p in proposals_str.split(";") if p.strip()] + + # Dialog erstellen + toolkit = smgr.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx) + dialog_model = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialogModel", ctx) + dialog_model.Width = 180 + dialog_model.Height = 150 + dialog_model.Title = "Vorschläge übernehmen" + + # ListBox + lb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlListBoxModel") + lb_model.Name = "ProposalList" + lb_model.PositionX = 10 + lb_model.PositionY = 10 + lb_model.Width = 160 + lb_model.Height = 80 + lb_model.StringItemList = tuple(proposals) + dialog_model.insertByName("ProposalList", lb_model) + + # Checkbox + cb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlCheckBoxModel") + cb_model.Name = "AllCheck" + cb_model.PositionX = 10 + cb_model.PositionY = 95 + cb_model.Width = 160 + cb_model.Height = 15 + cb_model.Label = "Alle Vorschläge übernehmen" + dialog_model.insertByName("AllCheck", cb_model) + + # OK-Button + btn_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel") + btn_model.Name = "OKButton" + btn_model.PositionX = 10 + btn_model.PositionY = 115 + btn_model.Width = 80 + btn_model.Height = 20 + btn_model.Label = "OK" + dialog_model.insertByName("OKButton", btn_model) + + # Abbrechen-Button + cancel_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel") + cancel_model.Name = "CancelButton" + cancel_model.PositionX = 100 + cancel_model.PositionY = 115 + cancel_model.Width = 80 + cancel_model.Height = 20 + cancel_model.Label = "Abbrechen" + dialog_model.insertByName("CancelButton", cancel_model) + + # Control Dialog + dialog = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialog", ctx) + dialog.setModel(dialog_model) + dialog.setVisible(True) + toolkit.createPeer(dialog, None) + + # Warten auf OK + while True: + import time + time.sleep(0.1) + # Prüfen auf Klick + if dialog.getControl("OKButton").Pressed: + all_flag = dialog.getControl("AllCheck").State == 1 + selected_idx = dialog.getControl("ProposalList").SelectedItems + if selected_idx: + selected_proposal = proposals[selected_idx[0]] + else: + selected_proposal = None + break + elif dialog.getControl("CancelButton").Pressed: + dialog.endExecute() + return + + # Anwenden + obj_cell = sheet.getCellByPosition(objekt_col, cell.RangeAddress.StartRow) + obj_text = obj_cell.String + if all_flag: + for prop in proposals: + idx = obj_text.lower().find(prop.lower()) + if idx != -1: + obj_text = obj_text[:idx] + prop + obj_text[idx+len(prop):] + else: + if selected_proposal: + idx = obj_text.lower().find(selected_proposal.lower()) + if idx != -1: + obj_text = obj_text[:idx] + selected_proposal + obj_text[idx+len(selected_proposal):] + + obj_cell.String = obj_text + obj_cell.CellBackColor = 0x00FF00 # grün + dialog.endExecute() + save_cache() + log(f"Vorschlag übernommen: {obj_text}") + +# ========================= +# Automatische Button-Registrierung +# ========================= +def register_toolbar_button(): + ctx = uno.getComponentContext() + smgr = ctx.ServiceManager + desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) + doc = desktop.getCurrentComponent() + frame = doc.CurrentController.Frame + # Button kann manuell über Makro-Menü an Toolbar gebunden werden + # Hier wird nur das Makro selbst registriert + # Symbolleiste muss in LO einmalig erstellt werden + +# ========================= +# Hauptmakro +# ========================= +def run_mapper_macro(): + try: + norm_dict = load_normvokabular(NV_MASTER_FILE) + log(f"NV_MASTER geladen ({len(norm_dict)} Begriffe)") + + apply_proposals_dialog() + + except Exception as e: + log(f"Fehler in run_mapper_macro: {e}")