Delete mapper_macro_1.3.py
This commit is contained in:
parent
9053559b56
commit
fb30fcd877
@ -1,297 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
import os
|
|
||||||
import uno
|
|
||||||
import unohelper
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
import pandas as pd
|
|
||||||
from pathlib import Path
|
|
||||||
from difflib import SequenceMatcher
|
|
||||||
|
|
||||||
# RapidFuzz für Fuzzy-Suche
|
|
||||||
try:
|
|
||||||
from rapidfuzz import fuzz
|
|
||||||
RAPIDFUZZ_AVAILABLE = True
|
|
||||||
except ImportError:
|
|
||||||
RAPIDFUZZ_AVAILABLE = False
|
|
||||||
|
|
||||||
# Spacy Lemmatizer
|
|
||||||
try:
|
|
||||||
import spacy
|
|
||||||
nlp = spacy.load("de_core_news_sm")
|
|
||||||
SPACY_AVAILABLE = True
|
|
||||||
except:
|
|
||||||
SPACY_AVAILABLE = False
|
|
||||||
nlp = None
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Pfade & Config
|
|
||||||
# =========================
|
|
||||||
SCRIPT_DIR = Path("/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro")
|
|
||||||
NV_MASTER_FILE = SCRIPT_DIR / "NV_MASTER.ods"
|
|
||||||
CACHE_FILE = SCRIPT_DIR / "mapper_cache.json"
|
|
||||||
LOG_FILE = SCRIPT_DIR / "mapper_log.txt"
|
|
||||||
STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Cache & Logging
|
|
||||||
# =========================
|
|
||||||
if CACHE_FILE.exists():
|
|
||||||
with open(CACHE_FILE,"r",encoding="utf-8") as f:
|
|
||||||
CACHE = json.load(f)
|
|
||||||
else:
|
|
||||||
CACHE = {}
|
|
||||||
|
|
||||||
def save_cache():
|
|
||||||
with open(CACHE_FILE,"w",encoding="utf-8") as f:
|
|
||||||
json.dump(CACHE, f, indent=2, ensure_ascii=False)
|
|
||||||
|
|
||||||
def log(msg):
|
|
||||||
with open(LOG_FILE,"a",encoding="utf-8") as f:
|
|
||||||
f.write(msg + "\n")
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Textverarbeitung
|
|
||||||
# =========================
|
|
||||||
def normalize_text(s):
|
|
||||||
if not s: return ""
|
|
||||||
s = str(s).lower().strip()
|
|
||||||
s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
|
|
||||||
s = re.sub(r"\s+"," ",s)
|
|
||||||
return s
|
|
||||||
|
|
||||||
lemma_cache = {}
|
|
||||||
def lemmatize_term(term):
|
|
||||||
term_norm = normalize_text(term)
|
|
||||||
if term_norm in lemma_cache:
|
|
||||||
return lemma_cache[term_norm]
|
|
||||||
if SPACY_AVAILABLE and nlp:
|
|
||||||
doc = nlp(term_norm)
|
|
||||||
lemma = " ".join([token.lemma_ for token in doc])
|
|
||||||
else:
|
|
||||||
lemma = term_norm
|
|
||||||
lemma_cache[term_norm] = lemma
|
|
||||||
return lemma
|
|
||||||
|
|
||||||
def compound_split(term):
|
|
||||||
parts = re.findall(r'[A-ZÄÖÜa-zäöü]+', term)
|
|
||||||
return parts if parts else [term]
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# NV_MASTER laden
|
|
||||||
# =========================
|
|
||||||
def load_normvokabular(file_path):
|
|
||||||
sheets = pd.read_excel(file_path, sheet_name=None, engine="odf")
|
|
||||||
norm_dict = {}
|
|
||||||
for sheet_name, df in sheets.items():
|
|
||||||
df = df.dropna(how="all", axis=1)
|
|
||||||
df.columns = [str(c).strip() for c in df.columns]
|
|
||||||
if "ID" not in df.columns or "Wort/Vokabel" not in df.columns:
|
|
||||||
continue
|
|
||||||
current_parent_id = None
|
|
||||||
for _, row in df.iterrows():
|
|
||||||
row_id = str(row["ID"]).strip() if pd.notna(row["ID"]) else None
|
|
||||||
row_word = str(row["Wort/Vokabel"]).strip() if pd.notna(row["Wort/Vokabel"]) else None
|
|
||||||
if row_id: current_parent_id = row_id
|
|
||||||
if not row_word: continue
|
|
||||||
norm_dict[normalize_text(row_word)] = {
|
|
||||||
"ID": current_parent_id,
|
|
||||||
"Wort/Vokabel": row_word
|
|
||||||
}
|
|
||||||
return norm_dict
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Mapping
|
|
||||||
# =========================
|
|
||||||
def map_term_with_indexes(term, norm_dict):
|
|
||||||
term_norm = normalize_text(term)
|
|
||||||
term_lemma = lemmatize_term(term_norm)
|
|
||||||
|
|
||||||
# Cache prüfen
|
|
||||||
if term_lemma in CACHE:
|
|
||||||
cached = CACHE[term_lemma]
|
|
||||||
if isinstance(cached, dict) and all(k in cached for k in ("hits","suggestions","ids")):
|
|
||||||
return cached["hits"], cached["suggestions"], cached["ids"]
|
|
||||||
else:
|
|
||||||
CACHE.pop(term_lemma, None)
|
|
||||||
|
|
||||||
hits = []
|
|
||||||
suggestions = []
|
|
||||||
ids = []
|
|
||||||
|
|
||||||
# Exakte Treffer
|
|
||||||
if term_norm in norm_dict:
|
|
||||||
e = norm_dict[term_norm]
|
|
||||||
hits.append(e["Wort/Vokabel"])
|
|
||||||
ids.append(e["ID"])
|
|
||||||
elif term_lemma in norm_dict:
|
|
||||||
e = norm_dict[term_lemma]
|
|
||||||
hits.append(e["Wort/Vokabel"])
|
|
||||||
ids.append(e["ID"])
|
|
||||||
else:
|
|
||||||
# Fuzzy Matching
|
|
||||||
for key, e in norm_dict.items():
|
|
||||||
score = fuzz.token_sort_ratio(term_lemma, key)/100.0 if RAPIDFUZZ_AVAILABLE else SequenceMatcher(None, term_lemma, key).ratio()
|
|
||||||
if score >= 0.75:
|
|
||||||
suggestions.append(e["Wort/Vokabel"])
|
|
||||||
ids.append(e["ID"])
|
|
||||||
|
|
||||||
CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
|
|
||||||
return hits, suggestions, ids
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# LibreOffice Dialog (ListBox + Checkbox)
|
|
||||||
# =========================
|
|
||||||
def apply_proposals_dialog():
|
|
||||||
ctx = uno.getComponentContext()
|
|
||||||
smgr = ctx.ServiceManager
|
|
||||||
desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
|
|
||||||
doc = desktop.getCurrentComponent()
|
|
||||||
if not doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
|
|
||||||
log("Kein Calc-Dokument aktiv")
|
|
||||||
return
|
|
||||||
selection = doc.CurrentSelection
|
|
||||||
sheet = doc.CurrentController.ActiveSheet
|
|
||||||
|
|
||||||
# Prüfen ob eine Zelle ausgewählt ist
|
|
||||||
if selection is None or not hasattr(selection, "getCellAddress"):
|
|
||||||
log("Keine Zelle ausgewählt")
|
|
||||||
return
|
|
||||||
cell = selection
|
|
||||||
|
|
||||||
# Spalte überprüfen
|
|
||||||
header_row = sheet.getCellRangeByPosition(0,0,sheet.Columns.Count-1,0)
|
|
||||||
objekt_col = None
|
|
||||||
norm_vorschlag_col = None
|
|
||||||
for col_idx in range(sheet.Columns.Count):
|
|
||||||
val = sheet.getCellByPosition(col_idx,0).String
|
|
||||||
if val.strip().lower() == "objektbeschreibung":
|
|
||||||
objekt_col = col_idx
|
|
||||||
elif val.strip().lower() == "norm_vorschlag":
|
|
||||||
norm_vorschlag_col = col_idx
|
|
||||||
if norm_vorschlag_col is None or objekt_col is None:
|
|
||||||
log("Spalte 'Norm_Vorschlag' oder 'Objektbeschreibung' nicht gefunden")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Vorschläge auslesen
|
|
||||||
proposals_str = sheet.getCellByPosition(norm_vorschlag_col, cell.RangeAddress.StartRow).String
|
|
||||||
if not proposals_str.strip():
|
|
||||||
log("Keine Vorschläge in der ausgewählten Zelle")
|
|
||||||
return
|
|
||||||
proposals = [p.strip() for p in proposals_str.split(";") if p.strip()]
|
|
||||||
|
|
||||||
# Dialog erstellen
|
|
||||||
toolkit = smgr.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx)
|
|
||||||
dialog_model = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialogModel", ctx)
|
|
||||||
dialog_model.Width = 180
|
|
||||||
dialog_model.Height = 150
|
|
||||||
dialog_model.Title = "Vorschläge übernehmen"
|
|
||||||
|
|
||||||
# ListBox
|
|
||||||
lb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlListBoxModel")
|
|
||||||
lb_model.Name = "ProposalList"
|
|
||||||
lb_model.PositionX = 10
|
|
||||||
lb_model.PositionY = 10
|
|
||||||
lb_model.Width = 160
|
|
||||||
lb_model.Height = 80
|
|
||||||
lb_model.StringItemList = tuple(proposals)
|
|
||||||
dialog_model.insertByName("ProposalList", lb_model)
|
|
||||||
|
|
||||||
# Checkbox
|
|
||||||
cb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlCheckBoxModel")
|
|
||||||
cb_model.Name = "AllCheck"
|
|
||||||
cb_model.PositionX = 10
|
|
||||||
cb_model.PositionY = 95
|
|
||||||
cb_model.Width = 160
|
|
||||||
cb_model.Height = 15
|
|
||||||
cb_model.Label = "Alle Vorschläge übernehmen"
|
|
||||||
dialog_model.insertByName("AllCheck", cb_model)
|
|
||||||
|
|
||||||
# OK-Button
|
|
||||||
btn_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
|
|
||||||
btn_model.Name = "OKButton"
|
|
||||||
btn_model.PositionX = 10
|
|
||||||
btn_model.PositionY = 115
|
|
||||||
btn_model.Width = 80
|
|
||||||
btn_model.Height = 20
|
|
||||||
btn_model.Label = "OK"
|
|
||||||
dialog_model.insertByName("OKButton", btn_model)
|
|
||||||
|
|
||||||
# Abbrechen-Button
|
|
||||||
cancel_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
|
|
||||||
cancel_model.Name = "CancelButton"
|
|
||||||
cancel_model.PositionX = 100
|
|
||||||
cancel_model.PositionY = 115
|
|
||||||
cancel_model.Width = 80
|
|
||||||
cancel_model.Height = 20
|
|
||||||
cancel_model.Label = "Abbrechen"
|
|
||||||
dialog_model.insertByName("CancelButton", cancel_model)
|
|
||||||
|
|
||||||
# Control Dialog
|
|
||||||
dialog = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialog", ctx)
|
|
||||||
dialog.setModel(dialog_model)
|
|
||||||
dialog.setVisible(True)
|
|
||||||
toolkit.createPeer(dialog, None)
|
|
||||||
|
|
||||||
# Warten auf OK
|
|
||||||
while True:
|
|
||||||
import time
|
|
||||||
time.sleep(0.1)
|
|
||||||
# Prüfen auf Klick
|
|
||||||
if dialog.getControl("OKButton").Pressed:
|
|
||||||
all_flag = dialog.getControl("AllCheck").State == 1
|
|
||||||
selected_idx = dialog.getControl("ProposalList").SelectedItems
|
|
||||||
if selected_idx:
|
|
||||||
selected_proposal = proposals[selected_idx[0]]
|
|
||||||
else:
|
|
||||||
selected_proposal = None
|
|
||||||
break
|
|
||||||
elif dialog.getControl("CancelButton").Pressed:
|
|
||||||
dialog.endExecute()
|
|
||||||
return
|
|
||||||
|
|
||||||
# Anwenden
|
|
||||||
obj_cell = sheet.getCellByPosition(objekt_col, cell.RangeAddress.StartRow)
|
|
||||||
obj_text = obj_cell.String
|
|
||||||
if all_flag:
|
|
||||||
for prop in proposals:
|
|
||||||
idx = obj_text.lower().find(prop.lower())
|
|
||||||
if idx != -1:
|
|
||||||
obj_text = obj_text[:idx] + prop + obj_text[idx+len(prop):]
|
|
||||||
else:
|
|
||||||
if selected_proposal:
|
|
||||||
idx = obj_text.lower().find(selected_proposal.lower())
|
|
||||||
if idx != -1:
|
|
||||||
obj_text = obj_text[:idx] + selected_proposal + obj_text[idx+len(selected_proposal):]
|
|
||||||
|
|
||||||
obj_cell.String = obj_text
|
|
||||||
obj_cell.CellBackColor = 0x00FF00 # grün
|
|
||||||
dialog.endExecute()
|
|
||||||
save_cache()
|
|
||||||
log(f"Vorschlag übernommen: {obj_text}")
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Automatische Button-Registrierung
|
|
||||||
# =========================
|
|
||||||
def register_toolbar_button():
|
|
||||||
ctx = uno.getComponentContext()
|
|
||||||
smgr = ctx.ServiceManager
|
|
||||||
desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
|
|
||||||
doc = desktop.getCurrentComponent()
|
|
||||||
frame = doc.CurrentController.Frame
|
|
||||||
# Button kann manuell über Makro-Menü an Toolbar gebunden werden
|
|
||||||
# Hier wird nur das Makro selbst registriert
|
|
||||||
# Symbolleiste muss in LO einmalig erstellt werden
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Hauptmakro
|
|
||||||
# =========================
|
|
||||||
def run_mapper_macro():
|
|
||||||
try:
|
|
||||||
norm_dict = load_normvokabular(NV_MASTER_FILE)
|
|
||||||
log(f"NV_MASTER geladen ({len(norm_dict)} Begriffe)")
|
|
||||||
|
|
||||||
apply_proposals_dialog()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
log(f"Fehler in run_mapper_macro: {e}")
|
|
||||||
Loading…
x
Reference in New Issue
Block a user