GND_Skript_Test/mapper_macro_1.3.py
2025-10-16 08:26:50 +00:00

298 lines
9.7 KiB
Python

# -*- coding: utf-8 -*-
import os
import uno
import unohelper
import re
import json
import pandas as pd
from pathlib import Path
from difflib import SequenceMatcher
# RapidFuzz für Fuzzy-Suche
try:
from rapidfuzz import fuzz
RAPIDFUZZ_AVAILABLE = True
except ImportError:
RAPIDFUZZ_AVAILABLE = False
# Spacy Lemmatizer
try:
import spacy
nlp = spacy.load("de_core_news_sm")
SPACY_AVAILABLE = True
except:
SPACY_AVAILABLE = False
nlp = None
# =========================
# Pfade & Config
# =========================
SCRIPT_DIR = Path("/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro")
NV_MASTER_FILE = SCRIPT_DIR / "NV_MASTER.ods"
CACHE_FILE = SCRIPT_DIR / "mapper_cache.json"
LOG_FILE = SCRIPT_DIR / "mapper_log.txt"
STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
# =========================
# Cache & Logging
# =========================
if CACHE_FILE.exists():
with open(CACHE_FILE,"r",encoding="utf-8") as f:
CACHE = json.load(f)
else:
CACHE = {}
def save_cache():
with open(CACHE_FILE,"w",encoding="utf-8") as f:
json.dump(CACHE, f, indent=2, ensure_ascii=False)
def log(msg):
with open(LOG_FILE,"a",encoding="utf-8") as f:
f.write(msg + "\n")
# =========================
# Textverarbeitung
# =========================
def normalize_text(s):
if not s: return ""
s = str(s).lower().strip()
s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
s = re.sub(r"\s+"," ",s)
return s
lemma_cache = {}
def lemmatize_term(term):
term_norm = normalize_text(term)
if term_norm in lemma_cache:
return lemma_cache[term_norm]
if SPACY_AVAILABLE and nlp:
doc = nlp(term_norm)
lemma = " ".join([token.lemma_ for token in doc])
else:
lemma = term_norm
lemma_cache[term_norm] = lemma
return lemma
def compound_split(term):
parts = re.findall(r'[A-ZÄÖÜa-zäöü]+', term)
return parts if parts else [term]
# =========================
# NV_MASTER laden
# =========================
def load_normvokabular(file_path):
sheets = pd.read_excel(file_path, sheet_name=None, engine="odf")
norm_dict = {}
for sheet_name, df in sheets.items():
df = df.dropna(how="all", axis=1)
df.columns = [str(c).strip() for c in df.columns]
if "ID" not in df.columns or "Wort/Vokabel" not in df.columns:
continue
current_parent_id = None
for _, row in df.iterrows():
row_id = str(row["ID"]).strip() if pd.notna(row["ID"]) else None
row_word = str(row["Wort/Vokabel"]).strip() if pd.notna(row["Wort/Vokabel"]) else None
if row_id: current_parent_id = row_id
if not row_word: continue
norm_dict[normalize_text(row_word)] = {
"ID": current_parent_id,
"Wort/Vokabel": row_word
}
return norm_dict
# =========================
# Mapping
# =========================
def map_term_with_indexes(term, norm_dict):
term_norm = normalize_text(term)
term_lemma = lemmatize_term(term_norm)
# Cache prüfen
if term_lemma in CACHE:
cached = CACHE[term_lemma]
if isinstance(cached, dict) and all(k in cached for k in ("hits","suggestions","ids")):
return cached["hits"], cached["suggestions"], cached["ids"]
else:
CACHE.pop(term_lemma, None)
hits = []
suggestions = []
ids = []
# Exakte Treffer
if term_norm in norm_dict:
e = norm_dict[term_norm]
hits.append(e["Wort/Vokabel"])
ids.append(e["ID"])
elif term_lemma in norm_dict:
e = norm_dict[term_lemma]
hits.append(e["Wort/Vokabel"])
ids.append(e["ID"])
else:
# Fuzzy Matching
for key, e in norm_dict.items():
score = fuzz.token_sort_ratio(term_lemma, key)/100.0 if RAPIDFUZZ_AVAILABLE else SequenceMatcher(None, term_lemma, key).ratio()
if score >= 0.75:
suggestions.append(e["Wort/Vokabel"])
ids.append(e["ID"])
CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
return hits, suggestions, ids
# =========================
# LibreOffice Dialog (ListBox + Checkbox)
# =========================
def apply_proposals_dialog():
ctx = uno.getComponentContext()
smgr = ctx.ServiceManager
desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
doc = desktop.getCurrentComponent()
if not doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
log("Kein Calc-Dokument aktiv")
return
selection = doc.CurrentSelection
sheet = doc.CurrentController.ActiveSheet
# Prüfen ob eine Zelle ausgewählt ist
if selection is None or not hasattr(selection, "getCellAddress"):
log("Keine Zelle ausgewählt")
return
cell = selection
# Spalte überprüfen
header_row = sheet.getCellRangeByPosition(0,0,sheet.Columns.Count-1,0)
objekt_col = None
norm_vorschlag_col = None
for col_idx in range(sheet.Columns.Count):
val = sheet.getCellByPosition(col_idx,0).String
if val.strip().lower() == "objektbeschreibung":
objekt_col = col_idx
elif val.strip().lower() == "norm_vorschlag":
norm_vorschlag_col = col_idx
if norm_vorschlag_col is None or objekt_col is None:
log("Spalte 'Norm_Vorschlag' oder 'Objektbeschreibung' nicht gefunden")
return
# Vorschläge auslesen
proposals_str = sheet.getCellByPosition(norm_vorschlag_col, cell.RangeAddress.StartRow).String
if not proposals_str.strip():
log("Keine Vorschläge in der ausgewählten Zelle")
return
proposals = [p.strip() for p in proposals_str.split(";") if p.strip()]
# Dialog erstellen
toolkit = smgr.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx)
dialog_model = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialogModel", ctx)
dialog_model.Width = 180
dialog_model.Height = 150
dialog_model.Title = "Vorschläge übernehmen"
# ListBox
lb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlListBoxModel")
lb_model.Name = "ProposalList"
lb_model.PositionX = 10
lb_model.PositionY = 10
lb_model.Width = 160
lb_model.Height = 80
lb_model.StringItemList = tuple(proposals)
dialog_model.insertByName("ProposalList", lb_model)
# Checkbox
cb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlCheckBoxModel")
cb_model.Name = "AllCheck"
cb_model.PositionX = 10
cb_model.PositionY = 95
cb_model.Width = 160
cb_model.Height = 15
cb_model.Label = "Alle Vorschläge übernehmen"
dialog_model.insertByName("AllCheck", cb_model)
# OK-Button
btn_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
btn_model.Name = "OKButton"
btn_model.PositionX = 10
btn_model.PositionY = 115
btn_model.Width = 80
btn_model.Height = 20
btn_model.Label = "OK"
dialog_model.insertByName("OKButton", btn_model)
# Abbrechen-Button
cancel_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
cancel_model.Name = "CancelButton"
cancel_model.PositionX = 100
cancel_model.PositionY = 115
cancel_model.Width = 80
cancel_model.Height = 20
cancel_model.Label = "Abbrechen"
dialog_model.insertByName("CancelButton", cancel_model)
# Control Dialog
dialog = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialog", ctx)
dialog.setModel(dialog_model)
dialog.setVisible(True)
toolkit.createPeer(dialog, None)
# Warten auf OK
while True:
import time
time.sleep(0.1)
# Prüfen auf Klick
if dialog.getControl("OKButton").Pressed:
all_flag = dialog.getControl("AllCheck").State == 1
selected_idx = dialog.getControl("ProposalList").SelectedItems
if selected_idx:
selected_proposal = proposals[selected_idx[0]]
else:
selected_proposal = None
break
elif dialog.getControl("CancelButton").Pressed:
dialog.endExecute()
return
# Anwenden
obj_cell = sheet.getCellByPosition(objekt_col, cell.RangeAddress.StartRow)
obj_text = obj_cell.String
if all_flag:
for prop in proposals:
idx = obj_text.lower().find(prop.lower())
if idx != -1:
obj_text = obj_text[:idx] + prop + obj_text[idx+len(prop):]
else:
if selected_proposal:
idx = obj_text.lower().find(selected_proposal.lower())
if idx != -1:
obj_text = obj_text[:idx] + selected_proposal + obj_text[idx+len(selected_proposal):]
obj_cell.String = obj_text
obj_cell.CellBackColor = 0x00FF00 # grün
dialog.endExecute()
save_cache()
log(f"Vorschlag übernommen: {obj_text}")
# =========================
# Automatische Button-Registrierung
# =========================
def register_toolbar_button():
ctx = uno.getComponentContext()
smgr = ctx.ServiceManager
desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
doc = desktop.getCurrentComponent()
frame = doc.CurrentController.Frame
# Button kann manuell über Makro-Menü an Toolbar gebunden werden
# Hier wird nur das Makro selbst registriert
# Symbolleiste muss in LO einmalig erstellt werden
# =========================
# Hauptmakro
# =========================
def run_mapper_macro():
try:
norm_dict = load_normvokabular(NV_MASTER_FILE)
log(f"NV_MASTER geladen ({len(norm_dict)} Begriffe)")
apply_proposals_dialog()
except Exception as e:
log(f"Fehler in run_mapper_macro: {e}")