Jonas Arnold Praktikum alle Files hinzugefügt

2025-10-16 15:39:43 +02:00 · 2025-10-16 15:39:43 +02:00 · 723ac7b6b1
commit 723ac7b6b1
parent 41e8b7103e
45 changed files with 3316 additions and 2819476 deletions
--- a/Ergebnisse/mapper_log.txt
+++ b/Ergebnisse/mapper_log.txt
--- a/Ha-Ho.ods
+++ b/Ha-Ho.ods
--- a/Ha-Klinc.ods
+++ b/Ha-Klinc.ods
--- a/Hu-J.ods
+++ b/Hu-J.ods
--- a/Klinc.ods
+++ b/Klinc.ods
--- a/Ha-Klinc.ods
+++ b/Ha-Klinc.ods
--- a/CSV/Normvokabular_INTERN/.~lock.NV_MASTER.ods#
+++ b/CSV/Normvokabular_INTERN/.~lock.NV_MASTER.ods#
@ -1 +0,0 @@
 ,jarnold,workPC,10.10.2025 09:26,file:///home/jarnold/.config/libreoffice/4;
--- a/CSV/Normvokabular_INTERN/NV_MASTER.ods
+++ b/CSV/Normvokabular_INTERN/NV_MASTER.ods
--- a/Mapper_Makro_Alte_Versionen/mapper_macro_1.3.py
+++ b/Mapper_Makro_Alte_Versionen/mapper_macro_1.3.py
--- a/Mapper_Makro_Alte_Versionen/mapper_macro_1.4.py
+++ b/Mapper_Makro_Alte_Versionen/mapper_macro_1.4.py
@ -0,0 +1,469 @@
 # -*- coding: utf-8 -*-
 # mapper_macro 1.5 - LibreOffice Calc
 # Features: Kompositum-Split, Cache, Live-Vorschläge nur auf 'Objektbeschreibung', Logging
 import os
 import re
 import json
 import datetime
 # optional imports (Pandas, Spacy, RapidFuzz)
 try:
    import pandas as pd
    PANDAS_AVAILABLE = True
 except Exception:
    PANDAS_AVAILABLE = False
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except Exception:
    SPACY_AVAILABLE = False
    nlp = None
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except Exception:
    RAPIDFUZZ_AVAILABLE = False
    from difflib import SequenceMatcher
 # ------------------------
 # Konfiguration
 # ------------------------
 BASE_DIR = "/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro"
 NV_MASTER_PATH = os.path.join(BASE_DIR, "NV_MASTER.ods")
 CACHE_FILE = os.path.join(BASE_DIR, "mapper_cache.json")
 LOG_FILE = os.path.join(BASE_DIR, "mapper_macro.log")
 STOPWORDS = {
    "mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an",
    "als","bei","für","aus","dem","den","des","eines","einer"
 }
 CONF_THRESHOLD = 0.75
 # ------------------------
 # Logging
 # ------------------------
 def log(msg, level="INFO"):
    ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    line = f"[{ts}] [{level}] {msg}\n"
    try:
        os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
        with open(LOG_FILE, "a", encoding="utf-8") as f:
            f.write(line)
    except Exception:
        pass
 # ------------------------
 # Cache laden
 # ------------------------
 try:
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, "r", encoding="utf-8") as f:
            CACHE = json.load(f)
    else:
        CACHE = {}
 except Exception as e:
    CACHE = {}
    log(f"Fehler beim Laden des Caches: {e}", level="ERROR")
 # ------------------------
 # Textnormalisierung & Lemma
 # ------------------------
 lemma_cache = {}
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).strip().lower()
    s = re.sub(r"[\(\)\[\]\"'\\;:\?!,\.]", "", s)
    s = re.sub(r"\s+", " ", s)
    return s
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        try:
            doc = nlp(term_norm)
            lemma = " ".join([t.lemma_ for t in doc])
        except Exception:
            lemma = term_norm
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 # ------------------------
 # Kompositum-Splitting
 # ------------------------
 def compound_split(term):
    if not term:
        return []
    parts = re.findall(r'[A-ZÄÖÜ][a-zäöü]+', term)
    if parts:
        return parts
    parts = [p for p in re.split(r'[-\s]+', term) if p]
    return parts or [term]
 # ------------------------
 # NV_MASTER indexieren
 # ------------------------
 def build_norm_index(nv_path):
    norm_dict = {}
    lemma_index = {}
    if not PANDAS_AVAILABLE:
        log("Pandas nicht verfügbar, NV_MASTER kann nicht gelesen.", level="ERROR")
        return norm_dict, lemma_index
    try:
        sheets = pd.read_excel(nv_path, sheet_name=None, engine="odf")
    except Exception as e:
        log(f"Fehler beim Einlesen von NV_MASTER: {e}", level="ERROR")
        return norm_dict, lemma_index
    for sheet_name, df in sheets.items():
        if str(sheet_name).strip().lower() == "master":
            continue
        df = df.fillna("")
        cols = [str(c).strip().lower() for c in df.columns]
        id_col = None
        word_col = None
        for i, c in enumerate(cols):
            if "id" in c:
                id_col = df.columns[i]
            if "wort" in c or "vokabel" in c:
                word_col = df.columns[i]
        if word_col is None and len(df.columns) >= 1:
            word_col = df.columns[-1]
        if id_col is None and len(df.columns) >= 1:
            id_col = df.columns[0]
        current_parent_id = None
        for _, row in df.iterrows():
            id_val = str(row[id_col]).strip() if id_col in df.columns else ""
            word_val = str(row[word_col]).strip() if word_col in df.columns else ""
            if id_val:
                current_parent_id = id_val
            if not word_val:
                continue
            norm_name = normalize_text(word_val)
            lemma = lemmatize_term(word_val)
            entry = {"Name": word_val.strip(), "ID": current_parent_id or "", "Sheet": sheet_name}
            norm_dict.setdefault(norm_name, []).append(entry)
            lemma_index.setdefault(lemma, []).append(entry)
    log(f"NV_MASTER geladen. Begriffe: {sum(len(v) for v in norm_dict.values())}")
    return norm_dict, lemma_index
 # ------------------------
 # Fuzzy / Vorschläge
 # ------------------------
 def fuzzy_score(a, b):
    if RAPIDFUZZ_AVAILABLE:
        try:
            return fuzz.token_set_ratio(a, b) / 100.0
        except Exception:
            return 0.0
    else:
        return SequenceMatcher(None, a.lower(), b.lower()).ratio()
 def get_suggestions_for_term(term_lemma, norm_dict, lemma_index, threshold=CONF_THRESHOLD):
    candidates = []
    for key_lemma, entries in lemma_index.items():
        score = fuzzy_score(term_lemma, key_lemma)
        if key_lemma.startswith(term_lemma):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    for norm_key, entries in norm_dict.items():
        score = fuzzy_score(term_lemma, norm_key)
        if norm_key.startswith(term_lemma):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    candidates.sort(key=lambda t: t[0], reverse=True)
    seen = set()
    results = []
    for score, name, id_ in candidates:
        key = (name, id_)
        if key in seen:
            continue
        seen.add(key)
        results.append({"score": score, "name": name, "id": id_})
    return [f'{r["name"]} ({r["id"]})' if r["id"] else r["name"] for r in results]
 # ------------------------
 # Mapping eines Terms (mit Cache)
 # ------------------------
 def map_term_with_indexes(term, norm_dict, lemma_index):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    if term_lemma in CACHE:
        c = CACHE[term_lemma]
        return c.get("hits", []), c.get("suggestions", []), c.get("ids", [])
    hits = []
    suggestions = []
    ids = []
    if term_norm in norm_dict:
        for e in norm_dict[term_norm]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    if not hits and term_lemma in lemma_index:
        for e in lemma_index[term_lemma]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    suggestions = get_suggestions_for_term(term_lemma, norm_dict, lemma_index)
    if not hits:
        tokens = compound_split(term)
        for t in tokens:
            t_lemma = lemmatize_term(t)
            if t_lemma in lemma_index:
                for e in lemma_index[t_lemma]:
                    hits.append(e["Name"])
                    if e["ID"]:
                        ids.append(e["ID"])
            else:
                suggestions.extend(get_suggestions_for_term(t_lemma, norm_dict, lemma_index))
    def uniq(seq):
        seen = set()
        out = []
        for x in seq:
            if x not in seen:
                seen.add(x)
                out.append(x)
        return out
    hits = uniq(hits)
    suggestions = uniq(suggestions)
    ids = uniq(ids)
    CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
    return hits, suggestions, ids
 # ------------------------
 # Header + Spalten
 # ------------------------
 def find_header_and_cols(sheet):
    try:
        cursor = sheet.createCursor()
        cursor.gotoStartOfUsedArea(False)
        cursor.gotoEndOfUsedArea(True)
        dr = cursor.getRangeAddress()
    except Exception:
        return None, None, None
    header_row = None
    objekt_col = None
    for r in range(0, min(5, dr.EndRow + 1)):
        for c in range(0, dr.EndColumn + 1):
            try:
                val = str(sheet.getCellByPosition(c, r).String).strip().lower()
            except Exception:
                val = ""
            if val == "objektbeschreibung":
                header_row = r
                objekt_col = c
                break
        if objekt_col is not None:
            break
    if header_row is None:
        return None, None, dr
    existing = {}
    for c in range(0, dr.EndColumn + 1):
        try:
            h = str(sheet.getCellByPosition(c, header_row).String).strip()
        except Exception:
            h = ""
        if h == "Norm_Treffer":
            existing["Norm_Treffer"] = c
        if h == "Norm_Vorschlag":
            existing["Norm_Vorschlag"] = c
        if h == "Norm_ID":
            existing["Norm_ID"] = c
    return header_row, objekt_col, dr, existing
 # ------------------------
 # Optimierter Live-Handler (nur Objektbeschreibung)
 # ------------------------
 def on_objektbeschreibung_change(oEvent=None):
    try:
        doc = XSCRIPTCONTEXT.getDocument()
        sheet = doc.CurrentController.ActiveSheet
    except Exception as e:
        log(f"Dokumentzugriff fehlgeschlagen: {e}", level="ERROR")
        return
    cell = None
    try:
        if oEvent and hasattr(oEvent, "Range") and oEvent.Range is not None:
            cell = oEvent.Range
        elif oEvent and hasattr(oEvent, "Source") and oEvent.Source is not None:
            cell = oEvent.Source
    except Exception:
        cell = None
    if cell is None:
        try:
            sel = doc.CurrentSelection
            if hasattr(sel, "getCellByPosition"):
                cell = sel
            else:
                cell = sel.getCellByPosition(0, 0)
        except Exception as e:
            log(f"Keine Selektion: {e}", level="ERROR")
            return
    try:
        row_index = cell.CellAddress.Row
        col_index = cell.CellAddress.Column
    except Exception:
        return
    try:
        header_row, objekt_col, dr, existing = find_header_and_cols(sheet)
        if header_row is None or col_index != objekt_col:
            return  # nur die Objektbeschreibung-Spalte bearbeiten
        last_col = dr.EndColumn
        if "Norm_Vorschlag" not in existing:
            last_col += 1
            existing["Norm_Vorschlag"] = last_col
            sheet.getCellByPosition(last_col, header_row).String = "Norm_Vorschlag"
        norm_sug_col = existing["Norm_Vorschlag"]
    except Exception as e:
        log(f"Fehler Spaltenbestimmung: {e}", level="ERROR")
        return
    try:
        txt = str(cell.String).strip()
        if not txt:
            sheet.getCellByPosition(norm_sug_col, row_index).String = ""
            return
        norm_dict, lemma_index = build_norm_index(NV_MASTER_PATH)
        suggestions_acc = []
        clauses = [c.strip() for c in re.split(r",", txt) if c.strip()]
        for cl in clauses:
            parts = [p.strip() for p in re.split(r"\s+", cl) if p.strip()]
            for p in parts:
                if p.lower() in STOPWORDS or re.fullmatch(r"\d+", p):
                    continue
                for sp in compound_split(p):
                    _, sugs, _ = map_term_with_indexes(sp, norm_dict, lemma_index)
                    suggestions_acc.extend(sugs)
        seen = set()
        ordered = []
        for s in suggestions_acc:
            if s not in seen:
                seen.add(s)
                ordered.append(s)
        sheet.getCellByPosition(norm_sug_col, row_index).String = " | ".join(ordered)
        with open(CACHE_FILE, "w", encoding="utf-8") as f:
            json.dump(CACHE, f, ensure_ascii=False, indent=2)
    except Exception as e:
        log(f"Fehler im Live-Handler: {e}", level="ERROR")
 # ------------------------
 # Batch-Durchlauf
 # ------------------------
 def run_mapper_macro():
    log("=== mapper_macro 1.5 gestartet ===", level="INFO")
    try:
        doc = XSCRIPTCONTEXT.getDocument()
        sheet = doc.CurrentController.ActiveSheet
        cursor = sheet.createCursor()
        cursor.gotoStartOfUsedArea(False)
        cursor.gotoEndOfUsedArea(True)
        dr = cursor.getRangeAddress()
    except Exception as e:
        log(f"Dokumentzugriff fehlgeschlagen: {e}", level="ERROR")
        return
    header_row, objekt_col, dr, existing = find_header_and_cols(sheet)
    if objekt_col is None:
        log("Spalte 'Objektbeschreibung' nicht gefunden.", level="ERROR")
        return
    if "Norm_Treffer" not in existing:
        last_col = dr.EndColumn + 1
        existing["Norm_Treffer"] = last_col
        sheet.getCellByPosition(last_col, header_row).String = "Norm_Treffer"
    if "Norm_Vorschlag" not in existing:
        last_col = dr.EndColumn + 2
        existing["Norm_Vorschlag"] = last_col
        sheet.getCellByPosition(last_col, header_row).String = "Norm_Vorschlag"
    if "Norm_ID" not in existing:
        last_col = dr.EndColumn + 3
        existing["Norm_ID"] = last_col
        sheet.getCellByPosition(last_col, header_row).String = "Norm_ID"
    norm_dict, lemma_index = build_norm_index(NV_MASTER_PATH)
    GREEN, YELLOW, RED = 0xADFF2F, 0xFFA500, 0xCC0000
    for r in range(header_row + 1, dr.EndRow + 1):
        try:
            cell = sheet.getCellByPosition(objekt_col, r)
            txt = str(cell.String).strip()
            if not txt:
                continue
            clauses = [c.strip() for c in re.split(r",", txt) if c.strip()]
            terms = []
            for cl in clauses:
                for p in [p.strip() for p in re.split(r"\s+", cl) if p.strip()]:
                    if p.lower() in STOPWORDS or re.fullmatch(r"\d+", p):
                        continue
                    terms.extend([sp.strip() for sp in compound_split(p) if sp.strip()])
            row_hits, row_sugs, row_ids = [], [], []
            any_unmapped = False
            for term in terms:
                hits, sugs, ids = map_term_with_indexes(term, norm_dict, lemma_index)
                row_hits.extend(hits)
                row_sugs.extend(sugs)
                row_ids.extend(ids)
                if not hits and not sugs:
                    any_unmapped = True
            def uniq(seq):
                seen = set()
                out = []
                for x in seq:
                    if x not in seen:
                        seen.add(x)
                        out.append(x)
                return out
            row_hits, row_sugs, row_ids = map(uniq, [row_hits, row_sugs, row_ids])
            sheet.getCellByPosition(existing["Norm_Treffer"], r).String = " | ".join(row_hits)
            sheet.getCellByPosition(existing["Norm_Vorschlag"], r).String = " | ".join(row_sugs)
            sheet.getCellByPosition(existing["Norm_ID"], r).String = " | ".join(row_ids)
            cell.CellBackColor = RED if any_unmapped else 0xFFFFFF
            sheet.getCellByPosition(existing["Norm_Treffer"], r).CellBackColor = GREEN if row_hits and not any_unmapped else 0xFFFFFF
            sheet.getCellByPosition(existing["Norm_Vorschlag"], r).CellBackColor = YELLOW if row_sugs else 0xFFFFFF
        except Exception as e:
            log(f"Fehler in Zeile {r}: {e}", level="ERROR")
            continue
    with open(CACHE_FILE, "w", encoding="utf-8") as f:
        json.dump(CACHE, f, ensure_ascii=False, indent=2)
    log("=== mapper_macro 1.5 fertig ===", level="INFO")
 # ------------------------
 # Export
 # ------------------------
 g_exportedScripts = (
    run_mapper_macro,
    on_objektbeschreibung_change
 )
--- a/Mapper_Makro_Alte_Versionen/mapper_macro_1.5.py
+++ b/Mapper_Makro_Alte_Versionen/mapper_macro_1.5.py
@ -0,0 +1,508 @@
 # -*- coding: utf-8 -*-
 # mapper_macro 1.5 - korrigiert: Logging im Dokumentverzeichnis, stabile Button-Erstellung,
 # keine Listener, optimiertes Mapping (ohne Listener-Teil)
 import os
 import re
 import json
 import datetime
 # optionale Module (Pandas, Spacy, RapidFuzz)
 try:
    import pandas as pd
    PANDAS_AVAILABLE = True
 except Exception:
    PANDAS_AVAILABLE = False
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except Exception:
    SPACY_AVAILABLE = False
    nlp = None
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except Exception:
    RAPIDFUZZ_AVAILABLE = False
 from difflib import SequenceMatcher
 # UNO (für Button/Paths)
 try:
    import uno
 except Exception:
    uno = None
 # ------------------------
 # Konfiguration (Fallback-BASE_DIR)
 # ------------------------
 BASE_DIR = os.path.expanduser("~/.config/libreoffice/4/user/Scripts/python/Vokabular_Abgleich_Makro")
 NV_MASTER_FILENAME = "NV_MASTER.ods"
 CACHE_FILENAME = "mapper_cache.json"
 LOG_FILENAME = "mapper_macro.log"
 STOPWORDS = {
    "mit", "ohne", "der", "die", "das", "ein", "eine", "und", "zu", "von", "im", "in", "auf", "an",
    "als", "bei", "für", "aus", "dem", "den", "des", "eines", "einer"
 }
 CONF_THRESHOLD = 0.82
 FUZZY_CUTOFF = 0.88
 # Per-document paths (initialized by set_paths_from_doc)
 DOC_DIR = BASE_DIR
 NV_MASTER_PATH = os.path.join(DOC_DIR, NV_MASTER_FILENAME)
 CACHE_FILE = os.path.join(DOC_DIR, CACHE_FILENAME)
 LOG_FILE = os.path.join(DOC_DIR, LOG_FILENAME)
 # in-memory cache
 try:
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, "r", encoding="utf-8") as f:
            CACHE = json.load(f)
    else:
        CACHE = {}
 except Exception:
    CACHE = {}
 # ------------------------
 # Pfade im Dokument setzen
 # ------------------------
 def set_paths_from_doc(doc):
    global DOC_DIR, NV_MASTER_PATH, CACHE_FILE, LOG_FILE
    try:
        url = getattr(doc, "URL", "")
        if url and url.strip():
            # UNO liefert file:///...
            try:
                system_path = uno.fileUrlToSystemPath(url)
            except Exception:
                # fallback: try simple unquote
                from urllib.parse import unquote, urlparse
                parsed = urlparse(url)
                if parsed.scheme == "file":
                    system_path = unquote(parsed.path)
                else:
                    system_path = ""
            if system_path:
                d = os.path.dirname(system_path)
                if os.path.isdir(d):
                    DOC_DIR = d
    except Exception:
        DOC_DIR = BASE_DIR
    NV_MASTER_PATH = os.path.join(DOC_DIR, NV_MASTER_FILENAME)
    CACHE_FILE = os.path.join(DOC_DIR, CACHE_FILENAME)
    LOG_FILE = os.path.join(DOC_DIR, LOG_FILENAME)
 # ------------------------
 # Logging (Dokumentdir, robust)
 # ------------------------
 def log(msg, level="INFO"):
    ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    line = f"[{ts}] [{level}] {msg}\n"
    try:
        # ensure directory exists
        os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
        with open(LOG_FILE, "a", encoding="utf-8") as f:
            f.write(line)
    except Exception:
        # absolute fallback: try writing into BASE_DIR
        try:
            fallback = os.path.join(BASE_DIR, LOG_FILENAME)
            os.makedirs(os.path.dirname(fallback), exist_ok=True)
            with open(fallback, "a", encoding="utf-8") as f:
                f.write(line)
        except Exception:
            # last resort: silent
            pass
 # ------------------------
 # Textvorbereitung & Helpers
 # ------------------------
 lemma_cache = {}
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).strip().lower()
    s = re.sub(r"[\(\)\[\]\"'\\;:\?!,\.]", "", s)
    s = re.sub(r"\s+", " ", s)
    return s
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        try:
            doc = nlp(term_norm)
            lemma = " ".join([t.lemma_ for t in doc])
        except Exception:
            lemma = term_norm
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 def compound_split(term):
    if not term:
        return []
    parts = re.findall(r'[A-ZÄÖÜ][a-zäöü]+', term)
    if parts:
        return parts
    parts = [p for p in re.split(r'[-\s]+', term) if p]
    return parts or [term]
 # ------------------------
 # NV_MASTER indexieren
 # ------------------------
 def build_norm_index(nv_path):
    norm_dict = {}
    lemma_index = {}
    if not PANDAS_AVAILABLE:
        log("Pandas nicht verfügbar, NV_MASTER kann nicht gelesen.", level="ERROR")
        return norm_dict, lemma_index
    try:
        sheets = pd.read_excel(nv_path, sheet_name=None, engine="odf")
    except Exception as e:
        log(f"Fehler beim Einlesen von NV_MASTER: {e}", level="ERROR")
        return norm_dict, lemma_index
    for sheet_name, df in sheets.items():
        if str(sheet_name).strip().lower() == "master":
            continue
        df = df.fillna("")
        cols = [str(c).strip().lower() for c in df.columns]
        # find id/word columns with fallback
        id_col = None
        word_col = None
        for i, c in enumerate(cols):
            if "id" in c:
                id_col = df.columns[i]
            if "wort" in c or "vokabel" in c:
                word_col = df.columns[i]
        if word_col is None and len(df.columns) >= 1:
            word_col = df.columns[-1]
        if id_col is None and len(df.columns) >= 1:
            id_col = df.columns[0]
        current_parent_id = None
        for _, row in df.iterrows():
            id_val = str(row[id_col]).strip() if id_col in df.columns else ""
            word_val = str(row[word_col]).strip() if word_col in df.columns else ""
            if id_val:
                current_parent_id = id_val
            if not word_val:
                continue
            norm_name = normalize_text(word_val)
            lemma = lemmatize_term(word_val)
            entry = {"Name": word_val.strip(), "ID": current_parent_id or "", "Sheet": sheet_name}
            norm_dict.setdefault(norm_name, []).append(entry)
            lemma_index.setdefault(lemma, []).append(entry)
    log(f"NV_MASTER geladen. Begriffe: {sum(len(v) for v in norm_dict.values())}", level="INFO")
    return norm_dict, lemma_index
 # ------------------------
 # Fuzzy Matching
 # ------------------------
 def fuzzy_score(a, b):
    a = (a or "").lower()
    b = (b or "").lower()
    if RAPIDFUZZ_AVAILABLE:
        try:
            return fuzz.token_sort_ratio(a, b) / 100.0
        except Exception:
            return 0.0
    else:
        return SequenceMatcher(None, a, b).ratio()
 def get_suggestions(term_lemma, norm_dict, lemma_index, threshold=FUZZY_CUTOFF, max_sugs=6):
    candidates = []
    term_norm = term_lemma or ""
    for key_lemma, entries in lemma_index.items():
        if not key_lemma:
            continue
        score = fuzzy_score(term_norm, key_lemma)
        if key_lemma.startswith(term_norm):
            score = min(score + 0.08, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    # also check normalized names
    for norm_key, entries in norm_dict.items():
        score = fuzzy_score(term_norm, norm_key)
        if norm_key.startswith(term_norm):
            score = min(score + 0.08, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    # sort & dedupe
    candidates.sort(key=lambda t: t[0], reverse=True)
    seen = set()
    out = []
    for score, name, id_ in candidates:
        key = (name, id_)
        if key in seen:
            continue
        seen.add(key)
        if id_:
            out.append(f"{name} ({id_})")
        else:
            out.append(name)
        if len(out) >= max_sugs:
            break
    return out
 # ------------------------
 # Mapping mit Cache
 # ------------------------
 def map_term(term, norm_dict, lemma_index):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    if term_lemma in CACHE:
        return CACHE[term_lemma]
    hits = []
    suggestions = []
    ids = []
    # exact
    if term_norm in norm_dict:
        for e in norm_dict[term_norm]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    # lemma
    if not hits and term_lemma in lemma_index:
        for e in lemma_index[term_lemma]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    # suggestions only if no hit
    if not hits:
        suggestions = get_suggestions(term_lemma, norm_dict, lemma_index)
    # remove suggestions that are equal/contain hits
    suggestions = [s for s in suggestions if not any(h.lower() in s.lower() for h in hits)]
    result = {"hits": hits, "suggestions": suggestions, "ids": ids}
    CACHE[term_lemma] = result
    return result
 # ------------------------
 # Button erstellen (sicher)
 # ------------------------
 def add_macro_button(sheet):
    try:
        doc = XSCRIPTCONTEXT.getDocument()
    except Exception:
        log("add_macro_button: kein Dokument-Kontext", level="WARN")
        return
    try:
        draw_page = sheet.DrawPage
        # avoid duplicate
        for shape in draw_page:
            try:
                if getattr(shape, "Name", "") == "MapperStartButton":
                    return
            except Exception:
                continue
        # create shape and button model
        shape = doc.createInstance("com.sun.star.drawing.ControlShape")
        shape.Name = "MapperStartButton"
        shape.Position = uno.createUnoStruct("com.sun.star.awt.Point")
        shape.Position.X = 1000
        shape.Position.Y = 200
        shape.Size = uno.createUnoStruct("com.sun.star.awt.Size")
        shape.Size.Width = 3000
        shape.Size.Height = 1000
        button_model = doc.createInstance("com.sun.star.form.component.CommandButton")
        button_model.Label = "Start Mapping"
        button_model.HelpText = "Startet das Mapping (run_mapper_macro)"
        # assign macro via ActionCommand is not enough; user must link in UI; we add the control and label
        shape.Control = button_model
        draw_page.add(shape)
        log("Button 'MapperStartButton' erstellt.", level="INFO")
    except Exception as e:
        log(f"add_macro_button Fehler: {e}", level="ERROR")
 # ------------------------
 # Hauptlauf (ohne Listener)
 # ------------------------
 def run_mapper_macro():
    try:
        doc = XSCRIPTCONTEXT.getDocument()
        set_paths_from_doc(doc)
        log("=== mapper_macro gestartet ===", level="INFO")
        sheet = doc.CurrentController.ActiveSheet
        add_macro_button(sheet)
        # used area
        cursor = sheet.createCursor()
        cursor.gotoStartOfUsedArea(False)
        cursor.gotoEndOfUsedArea(True)
        dr = cursor.getRangeAddress()
        # find header and objekt col
        header_row = None
        objekt_col = None
        for r in range(0, min(10, dr.EndRow + 1)):
            for c in range(0, dr.EndColumn + 1):
                try:
                    val = str(sheet.getCellByPosition(c, r).String).strip().lower()
                except Exception:
                    val = ""
                if val == "Objektbeschreibung":
                    header_row = r
                    objekt_col = c
                    break
            if objekt_col is not None:
                break
        if objekt_col is None:
            log("run_mapper_macro: 'Objektbeschreibung' Header nicht gefunden.", level="ERROR")
            return
        # ensure result cols
        existing = {}
        last_col = dr.EndColumn
        for c in range(0, dr.EndColumn + 1):
            try:
                h = str(sheet.getCellByPosition(c, header_row).String).strip()
            except Exception:
                h = ""
            if h == "Norm_Treffer":
                existing["Norm_Treffer"] = c
            if h == "Norm_Vorschlag":
                existing["Norm_Vorschlag"] = c
            if h == "Norm_ID":
                existing["Norm_ID"] = c
        if "Norm_Treffer" not in existing:
            last_col += 1
            existing["Norm_Treffer"] = last_col
            sheet.getCellByPosition(last_col, header_row).String = "Norm_Treffer"
        if "Norm_Vorschlag" not in existing:
            last_col += 1
            existing["Norm_Vorschlag"] = last_col
            sheet.getCellByPosition(last_col, header_row).String = "Norm_Vorschlag"
        if "Norm_ID" not in existing:
            last_col += 1
            existing["Norm_ID"] = last_col
            sheet.getCellByPosition(last_col, header_row).String = "Norm_ID"
        norm_tr_col = existing["Norm_Treffer"]
        norm_sug_col = existing["Norm_Vorschlag"]
        norm_id_col = existing["Norm_ID"]
        # build index
        norm_dict, lemma_index = build_norm_index(NV_MASTER_PATH)
        if not norm_dict and not lemma_index:
            log("run_mapper_macro: NV_MASTER leer oder nicht lesbar.", level="ERROR")
            return
        GREEN, YELLOW, RED = 0xADFF2F, 0xFFFF66, 0xFF9999
        rows_processed = 0
        for r in range(header_row + 1, dr.EndRow + 1):
            try:
                cell = sheet.getCellByPosition(objekt_col, r)
                txt = str(cell.String).strip()
                if not txt:
                    continue
                # phrase-first: try entire cleaned phrase (remove stopwords)
                tokens = [t.strip() for t in re.split(r'\s+', normalize_text(txt)) if t and t not in STOPWORDS]
                phrase = " ".join(tokens).strip()
                terms = []
                if phrase:
                    # first try phrase as whole
                    mapped_phrase = map_term(phrase, norm_dict, lemma_index)
                    if mapped_phrase["hits"] or mapped_phrase["suggestions"]:
                        # use phrase result (flatten hits+suggestions for output)
                        row_hits = mapped_phrase["hits"]
                        row_sugs = mapped_phrase["suggestions"]
                        row_ids = mapped_phrase["ids"]
                        any_unmapped = False if (row_hits or row_sugs) else True
                    else:
                        # fallback to token/compound processing
                        for p in [p for p in re.split(r'[,\s]+', txt) if p.strip()]:
                            if p.lower() in STOPWORDS or re.fullmatch(r'\d+', p):
                                continue
                            for sp in compound_split(p):
                                if sp and sp.strip():
                                    terms.append(sp.strip())
                        row_hits = []
                        row_sugs = []
                        row_ids = []
                        any_unmapped = False
                        for term in terms:
                            mapped = map_term(term, norm_dict, lemma_index)
                            hits, sugs, ids = mapped["hits"], mapped["suggestions"], mapped["ids"]
                            if hits:
                                row_hits.extend(hits)
                            if sugs:
                                row_sugs.extend(sugs)
                            if ids:
                                row_ids.extend(ids)
                            if not hits and not sugs:
                                any_unmapped = True
                else:
                    row_hits, row_sugs, row_ids = [], [], []
                    any_unmapped = True
                # dedupe preserving order
                def uniq(seq):
                    seen = set()
                    out = []
                    for x in seq:
                        if x not in seen:
                            seen.add(x)
                            out.append(x)
                    return out
                row_hits = uniq(row_hits)
                row_sugs = uniq(row_sugs)
                row_ids = uniq(row_ids)
                # write
                sheet.getCellByPosition(norm_tr_col, r).String = " | ".join(row_hits)
                sheet.getCellByPosition(norm_sug_col, r).String = " | ".join(row_sugs)
                sheet.getCellByPosition(norm_id_col, r).String = " | ".join(row_ids)
                cell.CellBackColor = RED if any_unmapped else 0xFFFFFF
                sheet.getCellByPosition(norm_tr_col, r).CellBackColor = GREEN if row_hits else 0xFFFFFF
                sheet.getCellByPosition(norm_sug_col, r).CellBackColor = YELLOW if row_sugs else 0xFFFFFF
                rows_processed += 1
            except Exception as e:
                log(f"Fehler in Zeile {r}: {e}", level="ERROR")
                continue
        # persist cache file to DOC_DIR
        try:
            with open(CACHE_FILE, "w", encoding="utf-8") as f:
                json.dump(CACHE, f, ensure_ascii=False, indent=2)
        except Exception as e:
            log(f"Cache speichern fehlgeschlagen: {e}", level="WARN")
        log(f"=== mapper_macro fertig. Zeilen verarbeitet: {rows_processed} ===", level="INFO")
    except Exception as e:
        # top-level safety
        try:
            log(f"run_mapper_macro: Unhandled exception: {e}", level="ERROR")
        except Exception:
            pass
 # ------------------------
 # Export
 # ------------------------
 g_exportedScripts = (run_mapper_macro,)
--- a/Mapper_Makro_Alte_Versionen/mapper_macro_2.0.py
+++ b/Mapper_Makro_Alte_Versionen/mapper_macro_2.0.py
@ -0,0 +1,343 @@
 # -*- coding: utf-8 -*-
 """
 LibreOffice Calc Makro: NV_MASTER-Abgleich (verbessertes semantisches Matching)
 Speicherort: /home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro/mapper_macro.py
 """
 import os
 import re
 import json
 import traceback
 # ------------------------------------------------------------
 # LIBRARIES & MODELS
 # ------------------------------------------------------------
 try:
    import pandas as pd
    PANDAS_AVAILABLE = True
 except Exception:
    PANDAS_AVAILABLE = False
 try:
    import spacy
    # Verwende das mittlere Modell für semantische Ähnlichkeit
    nlp = spacy.load("de_core_news_md")
    SPACY_AVAILABLE = True
 except Exception:
    SPACY_AVAILABLE = False
    nlp = None
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except Exception:
    RAPIDFUZZ_AVAILABLE = False
    from difflib import SequenceMatcher
 # ------------------------------------------------------------
 # KONFIGURATION
 # ------------------------------------------------------------
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 NV_MASTER_PATH = os.path.join(BASE_DIR, "NV_MASTER.ods")
 LOG_FILE = os.path.join(BASE_DIR, "mapper_macro.log")
 CACHE_FILE = os.path.join(BASE_DIR, "mapper_cache.json")
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.70  # etwas großzügiger für semantisches Matching
 # ------------------------------------------------------------
 # LOGGING
 # ------------------------------------------------------------
 def log(msg):
    """Schreibt technische Logs ins Makroverzeichnis."""
    try:
        with open(LOG_FILE, "a", encoding="utf-8") as f:
            f.write(msg.strip() + "\n")
    except Exception:
        pass
 log("Makro gestartet")
 # ------------------------------------------------------------
 # CACHE
 # ------------------------------------------------------------
 try:
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, "r", encoding="utf-8") as f:
            CACHE = json.load(f)
    else:
        CACHE = {}
 except Exception:
    CACHE = {}
 # ------------------------------------------------------------
 # TEXTNORMALISIERUNG & LEMMATISIERUNG
 # ------------------------------------------------------------
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).strip().lower()
    s = re.sub(r"[\(\)\[\]\"'\\;:\?!,\.]", "", s)
    s = re.sub(r"\s+", " ", s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    t = normalize_text(term)
    if t in lemma_cache:
        return lemma_cache[t]
    if SPACY_AVAILABLE and nlp:
        try:
            doc = nlp(t)
            lemma = " ".join([token.lemma_ for token in doc])
        except Exception:
            lemma = t
    else:
        lemma = t
    lemma_cache[t] = lemma
    return lemma
 # ------------------------------------------------------------
 # NV_MASTER LADEN
 # ------------------------------------------------------------
 def build_norm_index(nv_path):
    norm_dict = {}
    lemma_index = {}
    if not PANDAS_AVAILABLE:
        log("Pandas nicht verfügbar – NV_MASTER kann nicht geladen werden.")
        return norm_dict, lemma_index
    try:
        sheets = pd.read_excel(nv_path, sheet_name=None, engine="odf")
    except Exception as e:
        log(f"Fehler beim Laden von NV_MASTER: {e}")
        return norm_dict, lemma_index
    for sheet_name, df in sheets.items():
        if str(sheet_name).strip().lower() == "master":
            continue
        df = df.fillna("")
        cols = [str(c).strip().lower() for c in df.columns]
        id_col = next((df.columns[i] for i, c in enumerate(cols) if "id" in c), df.columns[0])
        word_col = next((df.columns[i] for i, c in enumerate(cols) if "wort" in c or "vokabel" in c), df.columns[-1])
        current_parent_id = None
        for _, row in df.iterrows():
            id_val = str(row[id_col]).strip()
            word_val = str(row[word_col]).strip()
            if id_val:
                current_parent_id = id_val
            if not word_val:
                continue
            norm_name = normalize_text(word_val)
            lemma = lemmatize_term(word_val)
            entry = {"Name": word_val, "ID": current_parent_id or "", "Sheet": sheet_name}
            norm_dict.setdefault(norm_name, []).append(entry)
            lemma_index.setdefault(lemma, []).append(entry)
    log(f"NV_MASTER geladen: {sum(len(v) for v in norm_dict.values())} Begriffe.")
    return norm_dict, lemma_index
 # ------------------------------------------------------------
 # SCORING: FUZZY + SEMANTISCH
 # ------------------------------------------------------------
 def fuzzy_score(a, b):
    if RAPIDFUZZ_AVAILABLE:
        try:
            return fuzz.token_set_ratio(a, b) / 100.0
        except Exception:
            return 0.0
    else:
        return SequenceMatcher(None, a.lower(), b.lower()).ratio()
 def semantic_similarity(a, b):
    if not SPACY_AVAILABLE or not hasattr(nlp.vocab, "vectors"):
        return 0.0
    try:
        doc_a, doc_b = nlp(a), nlp(b)
        if doc_a.vector_norm and doc_b.vector_norm:
            return float(doc_a.similarity(doc_b))
        return 0.0
    except Exception:
        return 0.0
 def combined_score(a, b):
    sf = fuzzy_score(a, b)
    ss = semantic_similarity(a, b)
    return max(sf, ss)
 # ------------------------------------------------------------
 # MATCHING & VORSCHLÄGE
 # ------------------------------------------------------------
 def get_suggestions_for_term(term_lemma, norm_dict, lemma_index, top_n=3, threshold=CONF_THRESHOLD):
    candidates = []
    for key_lemma, entries in lemma_index.items():
        score = combined_score(term_lemma, key_lemma)
        if key_lemma.startswith(term_lemma):
            score = min(score + 0.05, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    for norm_key, entries in norm_dict.items():
        score = combined_score(term_lemma, norm_key)
        if norm_key.startswith(term_lemma):
            score = min(score + 0.05, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    candidates.sort(key=lambda x: x[0], reverse=True)
    seen, results = set(), []
    for score, name, id_ in candidates:
        key = (name.lower(), id_.lower() if id_ else "")
        if key in seen:
            continue
        seen.add(key)
        results.append({"score": score, "name": name, "id": id_})
        if len(results) >= top_n:
            break
    return [f'{r["name"]} ({r["id"]})' if r["id"] else r["name"] for r in results]
 def map_term_with_indexes(term, norm_dict, lemma_index):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    if term_lemma in CACHE:
        return CACHE[term_lemma]["hits"], CACHE[term_lemma]["suggestions"], CACHE[term_lemma]["ids"]
    hits, suggestions, ids = [], [], []
    if term_norm in norm_dict:
        for e in norm_dict[term_norm]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    if not hits and term_lemma in lemma_index:
        for e in lemma_index[term_lemma]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    suggs = get_suggestions_for_term(term_lemma, norm_dict, lemma_index, top_n=3, threshold=CONF_THRESHOLD)
    filtered_suggs = []
    for s in suggs:
        s_clean = normalize_text(s.split(" (")[0])
        if s_clean not in [normalize_text(h) for h in hits]:
            filtered_suggs.append(s)
    suggestions = filtered_suggs
    def uniq(seq):
        seen = set()
        out = []
        for x in seq:
            if x not in seen:
                seen.add(x)
                out.append(x)
        return out
    hits, suggestions, ids = uniq(hits), uniq(suggestions), uniq(ids)
    CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
    log(f"TERM: {term} | HITS: {hits} | SUGGS: {suggestions}")
    return hits, suggestions, ids
 # ------------------------------------------------------------
 # HAUPTMAKRO
 # ------------------------------------------------------------
 def run_mapper_macro():
    try:
        doc = XSCRIPTCONTEXT.getDocument()
        sheet = doc.CurrentController.ActiveSheet
    except Exception as e:
        log(f"Fehler beim Zugriff auf Dokument: {e}")
        return
    norm_dict, lemma_index = build_norm_index(NV_MASTER_PATH)
    if not norm_dict:
        log("Fehler: NV_MASTER leer oder nicht gefunden.")
        return
    try:
        cursor = sheet.createCursor()
        cursor.gotoStartOfUsedArea(False)
        cursor.gotoEndOfUsedArea(True)
        used = cursor.getRangeAddress()
    except Exception as e:
        log(f"Cursor-Fehler: {e}")
        return
    header_row = 0
    objekt_col = None
    for c in range(0, used.EndColumn + 1):
        val = str(sheet.getCellByPosition(c, header_row).String).strip().lower()
        if val == "objektbeschreibung":
            objekt_col = c
            break
    if objekt_col is None:
        log("Keine Spalte 'Objektbeschreibung' gefunden.")
        return
    existing = {}
    for c in range(0, used.EndColumn + 1):
        h = str(sheet.getCellByPosition(c, header_row).String).strip()
        if h == "Norm_Treffer": existing["Norm_Treffer"] = c
        if h == "Norm_Vorschlag": existing["Norm_Vorschlag"] = c
        if h == "Norm_ID": existing["Norm_ID"] = c
    last_col = used.EndColumn
    for name in ["Norm_Treffer", "Norm_Vorschlag", "Norm_ID"]:
        if name not in existing:
            last_col += 1
            existing[name] = last_col
            sheet.getCellByPosition(last_col, header_row).String = name
    GREEN, YELLOW, RED = 0xADFF2F, 0xFFD700, 0xCC0000
    norm_tr_col, norm_sug_col, norm_id_col = existing["Norm_Treffer"], existing["Norm_Vorschlag"], existing["Norm_ID"]
    rows = 0
    for r in range(header_row + 1, used.EndRow + 1):
        txt = str(sheet.getCellByPosition(objekt_col, r).String).strip()
        if not txt:
            continue
        terms = [t.strip() for t in re.split(r",|\s+", txt) if t.strip() and t.lower() not in STOPWORDS]
        row_hits, row_sugs, row_ids, any_unmapped = [], [], [], False
        for term in terms:
            hits, sugs, ids = map_term_with_indexes(term, norm_dict, lemma_index)
            if hits: row_hits.extend(hits)
            if sugs: row_sugs.extend(sugs)
            if ids: row_ids.extend(ids)
            if not hits and not sugs: any_unmapped = True
        def uniq(seq):
            seen = set()
            out = []
            for x in seq:
                if x not in seen:
                    seen.add(x)
                    out.append(x)
            return out
        row_hits, row_sugs, row_ids = uniq(row_hits), uniq(row_sugs), uniq(row_ids)
        sheet.getCellByPosition(norm_tr_col, r).String = " | ".join(row_hits)
        sheet.getCellByPosition(norm_sug_col, r).String = " | ".join(row_sugs)
        sheet.getCellByPosition(norm_id_col, r).String = " | ".join(row_ids)
        obj_cell = sheet.getCellByPosition(objekt_col, r)
        sug_cell = sheet.getCellByPosition(norm_sug_col, r)
        tr_cell = sheet.getCellByPosition(norm_tr_col, r)
        if any_unmapped:
            obj_cell.CellBackColor = RED
        elif row_hits:
            tr_cell.CellBackColor = GREEN
        if row_sugs:
            sug_cell.CellBackColor = YELLOW
        rows += 1
    with open(CACHE_FILE, "w", encoding="utf-8") as f:
        json.dump(CACHE, f, ensure_ascii=False, indent=2)
    log(f"Makro abgeschlossen, {rows} Zeilen verarbeitet.")
 g_exportedScripts = (run_mapper_macro,)
--- a/Mapper_Makro_Alte_Versionen/mapper_macro_2.1.py
+++ b/Mapper_Makro_Alte_Versionen/mapper_macro_2.1.py
@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 # LibreOffice Calc macro: NV_MASTER-Abgleich, Pandas+odf, Cache, Farben
-# Pfade: BASE_DIR muss auf das Verzeichnis zeigen, in dem NV_MASTER.ods + Makro liegen.
+# Speicherort: /home/jarnold/.config/libreoffice/4/user/Scripts/python/Vokabular_Abgleich_Makro/mapper_macro_2.1.py
 # Speichern: /home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro/mapper_macro.py
 import os
 import re
@ -9,7 +8,6 @@ import json
 import traceback
 # UNO-Context wird zur Laufzeit zur Verfügung gestellt (XSCRIPTCONTEXT)
 # Third-party libs: pandas, odfpy, optional: spacy, rapidfuzz
 try:
    import pandas as pd
    PANDAS_AVAILABLE = True
@ -34,10 +32,10 @@ except Exception:
 # ------------------------
 # Konfiguration
 # ------------------------
-BASE_DIR = "/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro"
+BASE_DIR = "/home/jarnold/.config/libreoffice/4/user/Scripts/python/Vokabular_Abgleich_Makro"
 NV_MASTER_PATH = os.path.join(BASE_DIR, "NV_MASTER.ods")
-LOG_FILE = os.path.join(BASE_DIR, "mapper_macro.log")
+LOG_FILE = os.path.join(BASE_DIR, "mapper_macro_2.1.log")
-CACHE_FILE = os.path.join(BASE_DIR, "mapper_cache.json")
+CACHE_FILE = os.path.join(BASE_DIR, "mapper_cache_2.1.json")
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75  # Basis-Schwelle für Vorschläge
@ -110,10 +108,8 @@ def build_norm_index(nv_path):
    for sheet_name, df in sheets.items():
        if str(sheet_name).strip().lower() == "master":
            continue
-        # normalize columns names to find ID and Wort columns
+        df = df.fillna("")
        df = df.fillna("")  # leere Zellen als ""
        cols = [str(c).strip().lower() for c in df.columns]
        # try to find columns
        id_col = None
        word_col = None
        for i, c in enumerate(cols):
@ -121,7 +117,6 @@ def build_norm_index(nv_path):
                id_col = df.columns[i]
            if "wort" in c or "vokabel" in c:
                word_col = df.columns[i]
        # fallback: if not found, try first/last
        if word_col is None and len(df.columns) >= 1:
            word_col = df.columns[-1]
        if id_col is None and len(df.columns) >= 1:
@ -131,18 +126,14 @@ def build_norm_index(nv_path):
        for _, row in df.iterrows():
            id_val = str(row[id_col]).strip() if id_col in df.columns else ""
            word_val = str(row[word_col]).strip() if word_col in df.columns else ""
            # if row defines an ID, set as current parent
            if id_val:
                current_parent_id = id_val
            # skip empty word cells
            if not word_val:
                continue
            norm_name = normalize_text(word_val)
            lemma = lemmatize_term(word_val)
            entry = {"Name": word_val.strip(), "ID": current_parent_id or "", "Sheet": sheet_name}
            # add to norm_dict by normalized name (exact matching)
            norm_dict.setdefault(norm_name, []).append(entry)
            # add to lemma_index
            lemma_index.setdefault(lemma, []).append(entry)
    log(f"NV_MASTER geladen ({NV_MASTER_PATH}). Begriffe: {sum(len(v) for v in norm_dict.values())}")
@ -163,10 +154,8 @@ def fuzzy_score(a, b):
        except Exception:
            return 0.0
-def get_suggestions_for_term(term_lemma, norm_dict, lemma_index, top_n=None, threshold=CONF_THRESHOLD):
+def get_suggestions_for_term(term_lemma, norm_dict, lemma_index, threshold=CONF_THRESHOLD):
    # collect candidates from lemma_index keys and norm_dict keys
    candidates = []
    # iterate over lemma_index keys for candidate names
    for key_lemma, entries in lemma_index.items():
        score = fuzzy_score(term_lemma, key_lemma)
        if key_lemma.startswith(term_lemma):
@ -174,7 +163,6 @@ def get_suggestions_for_term(term_lemma, norm_dict, lemma_index, top_n=None, thr
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    # also check norm_dict keys (exact-normalized names) as additional candidates
    for norm_key, entries in norm_dict.items():
        score = fuzzy_score(term_lemma, norm_key)
        if norm_key.startswith(term_lemma):
@ -182,9 +170,7 @@ def get_suggestions_for_term(term_lemma, norm_dict, lemma_index, top_n=None, thr
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    # sort by score descending
    candidates.sort(key=lambda t: t[0], reverse=True)
    # unique by (Name, ID) preserve score order
    seen = set()
    results = []
    for score, name, id_ in candidates:
@ -193,40 +179,28 @@ def get_suggestions_for_term(term_lemma, norm_dict, lemma_index, top_n=None, thr
            continue
        seen.add(key)
        results.append({"score": score, "name": name, "id": id_})
    # return all candidates (no limit) as "Name (ID)"
    return [f'{r["name"]} ({r["id"]})' if r["id"] else r["name"] for r in results]
 def map_term_with_indexes(term, norm_dict, lemma_index):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    # cache lookup
    if term_lemma in CACHE:
-        return CACHE[term_lemma]["hits"], CACHE[term_lemma]["suggestions"], CACHE[term_lemma]["ids"]
+        return CACHE[term_lemma]["hits"], CACHE[term_lemma]["suggestions"]
    hits = []
    suggestions = []
    ids = []
    # 1) exact normalized name match
    if term_norm in norm_dict:
        for e in norm_dict[term_norm]:
-            hits.append(e["Name"])
+            hits.append(f'{e["Name"]} ({e["ID"]})' if e["ID"] else e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    # 2) lemma match (if not already hits)
    if not hits and term_lemma in lemma_index:
        for e in lemma_index[term_lemma]:
-            hits.append(e["Name"])
+            hits.append(f'{e["Name"]} ({e["ID"]})' if e["ID"] else e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
-    # 3) suggestions via fuzzy (always compute even if hits exist, but suggestions empty if exact)
+    if not hits:
-    suggs = get_suggestions_for_term(term_lemma, norm_dict, lemma_index, top_n=None, threshold=CONF_THRESHOLD)
+        suggestions = get_suggestions_for_term(term_lemma, norm_dict, lemma_index)
    # If there are exact hits, we still may present suggestions (user wanted unlimited), but suggestions are secondary
    suggestions = suggs
    # deduplicate lists preserving order
    def unique_preserve(seq):
        seen = set()
        out = []
@ -238,18 +212,15 @@ def map_term_with_indexes(term, norm_dict, lemma_index):
    hits = unique_preserve(hits)
    suggestions = unique_preserve(suggestions)
    ids = unique_preserve(ids)
-    # cache result
+    CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions}
-    CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
+    return hits, suggestions
    return hits, suggestions, ids
 # ------------------------
 # Haupt-Makro
 # ------------------------
 def run_mapper_macro():
    try:
        # UNO doc/sheet
        doc = XSCRIPTCONTEXT.getDocument()
        sheet = doc.CurrentController.ActiveSheet
        cursor = sheet.createCursor()
@ -260,7 +231,6 @@ def run_mapper_macro():
        log("Fehler: konnte Dokument/Sheet nicht öffnen: " + str(e))
        return
    # find header row and Objektbeschreibung column (search first 5 rows)
    header_row = None
    objekt_col = None
    max_col = data_range.EndColumn
@ -281,7 +251,7 @@ def run_mapper_macro():
        log("Spalte 'Objektbeschreibung' nicht gefunden. Abbruch.")
        return
-    # determine or create result columns: search if exist anywhere; otherwise append at right end
+    # Prüfen/Anlegen der Ergebnis-Spalten
    existing = {}
    for c in range(0, data_range.EndColumn+1):
        try:
@ -292,59 +262,38 @@ def run_mapper_macro():
            existing["Norm_Treffer"] = c
        if h == "Norm_Vorschlag":
            existing["Norm_Vorschlag"] = c
        if h == "Norm_ID":
            existing["Norm_ID"] = c
    # append columns at right end if missing
    last_col = data_range.EndColumn
    if "Norm_Treffer" not in existing:
        last_col += 1
        existing["Norm_Treffer"] = last_col
        try:
        sheet.getCellByPosition(last_col, header_row).String = "Norm_Treffer"
        except Exception:
            pass
    if "Norm_Vorschlag" not in existing:
        last_col += 1
        existing["Norm_Vorschlag"] = last_col
        try:
        sheet.getCellByPosition(last_col, header_row).String = "Norm_Vorschlag"
        except Exception:
            pass
    if "Norm_ID" not in existing:
        last_col += 1
        existing["Norm_ID"] = last_col
        try:
            sheet.getCellByPosition(last_col, header_row).String = "Norm_ID"
        except Exception:
            pass
    norm_tr_col = existing["Norm_Treffer"]
    norm_sug_col = existing["Norm_Vorschlag"]
    norm_id_col = existing["Norm_ID"]
    # Build norm indexes
    norm_dict, lemma_index = build_norm_index(NV_MASTER_PATH)
    if not norm_dict and not lemma_index:
        log("NV_MASTER leer oder nicht lesbar. Abbruch.")
        return
    # colors
    GREEN = 0xADFF2F
    YELLOW = 0xFFA500
    RED = 0xCC0000
    WHITE = 0xFFFFFF
    # iterate rows
    rows_processed = 0
    for r in range(header_row + 1, data_range.EndRow + 1):
        try:
            cell = sheet.getCellByPosition(objekt_col, r)
            txt = str(cell.String).strip()
            if not txt:
                # clear any previous outputs? keep existing per spec; skip empty
                continue
            # tokenize: split by commas first, then whitespace; filter stopwords and pure numbers
            clauses = [c.strip() for c in re.split(r",", txt) if c.strip()]
            terms = []
            for cl in clauses:
@ -356,24 +305,19 @@ def run_mapper_macro():
                        continue
                    terms.append(p)
            # for each term, get hits/suggestions/ids
            row_hits = []
            row_sugs = []
-            row_ids = []
+            unmapped_terms = []
-            any_unmapped = False  # at least one term without hit and without suggestion
+
            # We will record for each term
            for term in terms:
-                hits, sugs, ids = map_term_with_indexes(term, norm_dict, lemma_index)
+                hits, sugs = map_term_with_indexes(term, norm_dict, lemma_index)
                if hits:
                    row_hits.extend(hits)
                else:
                    unmapped_terms.append(term)
                    if sugs:
                        row_sugs.extend(sugs)
                if ids:
                    row_ids.extend(ids)
                if (not hits) and (not sugs):
                    any_unmapped = True
            # deduplicate preserving order
            def uniq(seq):
                seen = set()
                out = []
@ -385,57 +329,30 @@ def run_mapper_macro():
            row_hits = uniq(row_hits)
            row_sugs = uniq(row_sugs)
            row_ids = uniq(row_ids)
-            # write outputs (unlimited lists, joined with " | ")
+            # Farb-Logik für Objektbeschreibung
-            try:
+            if terms and not unmapped_terms and row_hits:
-                sheet.getCellByPosition(norm_tr_col, r).String = " | ".join(row_hits)
+                cell.CellBackColor = GREEN
-                sheet.getCellByPosition(norm_sug_col, r).String = " | ".join(row_sugs)
+                row_sugs = []
-                sheet.getCellByPosition(norm_id_col, r).String = " | ".join(row_ids)
+            elif row_hits:
-            except Exception:
+                cell.CellBackColor = YELLOW
-                pass
+            else:
            # Coloring rules per new spec:
            # - Objektbeschreibung cell: RED if any_unmapped else no change (we do not color green/yellow here)
            # - Norm_Treffer cell: GREEN if all terms matched (i.e., terms non-empty and no term unmapped and at least one hit per term)
            # - Norm_Vorschlag cell: YELLOW if at least one suggestion exists
            # Determine "all matched": terms non-empty and every term has at least one hit (we approximated by checking any_unmapped and hits length)
            all_matched = False
            if terms:
                # all_matched if no term without hit and there is at least one hit overall
                if (not any_unmapped) and row_hits:
                    all_matched = True
            # apply colors
            try:
                if any_unmapped:
                cell.CellBackColor = RED
-                else:
+
-                    # clear red if previously set? We'll leave unchanged if not set. Optionally set to default 16777215 (white)
+            # Ergebniszellen
                    pass
                # Norm_Treffer coloring
            tr_cell = sheet.getCellByPosition(norm_tr_col, r)
-                if all_matched:
+            tr_cell.String = " | ".join(row_hits)
-                    tr_cell.CellBackColor = GREEN
+            tr_cell.CellBackColor = GREEN if row_hits else WHITE
-                else:
+
                    # clear color if needed -> set to white
                    tr_cell.CellBackColor = 0xFFFFFF
                # Norm_Vorschlag coloring
            sug_cell = sheet.getCellByPosition(norm_sug_col, r)
-                if row_sugs:
+            sug_cell.String = " | ".join(row_sugs)
-                    sug_cell.CellBackColor = YELLOW
+            sug_cell.CellBackColor = YELLOW if row_sugs else WHITE
                else:
                    sug_cell.CellBackColor = 0xFFFFFF
            except Exception:
                pass
            rows_processed += 1
        except Exception as e:
-            # continue processing other rows; log once
+            log(f"Fehler in Zeile {r}: {e}\n{traceback.format_exc()}")
            log(f"Fehler in Zeile {r}: {e}")
    # persist cache
    try:
        with open(CACHE_FILE, "w", encoding="utf-8") as f:
            json.dump(CACHE, f, ensure_ascii=False, indent=2)
@ -444,5 +361,5 @@ def run_mapper_macro():
    log(f"run_mapper_macro fertig. Zeilen verarbeitet: {rows_processed}")
-# Export for LO
+# Export für LibreOffice
 g_exportedScripts = (run_mapper_macro,)
--- a/Mapper_Makro_Alte_Versionen/mapper_macro_2.2.py
+++ b/Mapper_Makro_Alte_Versionen/mapper_macro_2.2.py
@ -0,0 +1,455 @@
 # -*- coding: utf-8 -*-
 """
 LibreOffice/Excel Macro: NV_MASTER-Abgleich
 Version: 2.3
 Pfad: libreoffice/4/user/Scripts/python/Vokabular_Abgleich_Makro/mapper_macro_2.3.py
 Beschreibung:
 -------------
 Dieses Python-Makro für LibreOffice/Excel führt einen Abgleich von Begriffen
 aus einem aktiven Sheet gegen ein zentral gepflegtes NV_MASTER-Vokabular durch.
 Es erstellt Treffer, Vorschläge und markiert die Zellen farblich. 
 Hauptfunktionen:
 ----------------
 1. Text-Normalisierung und Lemma-Bestimmung
 2. Laden des NV_MASTER-Vokabulars und Aufbau von Norm-Index + Lemma-Index
 3. Fuzzy Matching (RapidFuzz oder difflib) für Begriffe
 4. Treffer- und Vorschlagsbestimmung
 5. Mapping auf Sheet:
   - Norm_Treffer (grün)
   - Norm_Vorschlag (gelb)
   - Kein_Treffer (rot)
 6. Caching zur Vermeidung mehrfacher Berechnungen
 7. Logging in externe Datei
 Externe Abhängigkeiten:
 -----------------------
 - pandas (für ODS/Excel-Leseoperationen)
 - spacy (für deutsche Lemma-Bestimmung)
 - rapidfuzz (optional für schnellere Fuzzy-String-Matches)
 UNO-spezifische Objekte:
 ------------------------
 - XSCRIPTCONTEXT: Bereitgestellt durch LibreOffice zur Laufzeit
 Schwachstellen / Optimierungsansätze:
 -------------------------------------
 - Fehlerbehandlung ist robust, aber teilweise sehr still (z.B. Cache-Fehler, Pandas-Fehler).
 - Schleifen über Zellen sind bei großen Sheets langsam (potenziell durch pandas vollständig ersetzen).
 - Lemma-Berechnung könnte nur einmal für NV_MASTER und einmal für Sheet durchgeführt werden.
 - RapidFuzz optional; fallback auf SequenceMatcher ist deutlich langsamer.
 - Cache wird nur am Ende geschrieben; Absturz vor Ende verliert bisherige Ergebnisse.
 - Farbwerte sind fest codiert; parametrisieren könnte Flexibilität erhöhen.
 - Stopwords sind hart codiert; konfigurierbar wäre effizienter.
 - Es werden keine parallelen Abfragen / Batch-Operationen verwendet.
 - Logging nur in Datei; LibreOffice-eigene Meldungen oder Fortschrittsanzeige fehlen.
 """
 import os
 import re
 import json
 import traceback
 # UNO-Context wird zur Laufzeit von LibreOffice bereitgestellt
 try:
    import pandas as pd
    PANDAS_AVAILABLE = True
 except Exception:
    PANDAS_AVAILABLE = False
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except Exception:
    SPACY_AVAILABLE = False
    nlp = None
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except Exception:
    RAPIDFUZZ_AVAILABLE = False
    from difflib import SequenceMatcher
 # ------------------------
 # Konfiguration
 # ------------------------
 BASE_DIR = "/home/jarnold/.config/libreoffice/4/user/Scripts/python/Vokabular_Abgleich_Makro"
 NV_MASTER_PATH = os.path.join(BASE_DIR, "NV_MASTER.ods")
 LOG_FILE = os.path.join(BASE_DIR, "mapper_macro_2.3.log")
 CACHE_FILE = os.path.join(BASE_DIR, "mapper_cache_2.3.json")
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75  # Basis-Schwelle für Vorschläge
 # ------------------------
 # Logging-Funktion
 # ------------------------
 def log(msg):
    """Schreibt Nachricht in LOG_FILE. Fehler werden ignoriert."""
    try:
        with open(LOG_FILE, "a", encoding="utf-8") as f:
            f.write(msg + "\n")
    except Exception:
        pass
 # ------------------------
 # Cache laden
 # ------------------------
 try:
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, "r", encoding="utf-8") as f:
            CACHE = json.load(f)
    else:
        CACHE = {}
 except Exception:
    CACHE = {}
 # ------------------------
 # Text-Normalisierung & Lemma
 # ------------------------
 def normalize_text(s):
    """Entfernt Sonderzeichen, multiple Whitespaces, wandelt in lowercase."""
    if not s:
        return ""
    s = str(s).strip().lower()
    s = re.sub(r"[\(\)\[\]\"'\\;:\?!,\.]", "", s)
    s = re.sub(r"\s+", " ", s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    """Lemmatisiert einen Begriff mit SpaCy. Falls nicht verfügbar, Rückgabe Normalized String."""
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        try:
            doc = nlp(term_norm)
            lemma = " ".join([token.lemma_ for token in doc])
        except Exception:
            lemma = term_norm
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 # ------------------------
 # NV_MASTER laden
 # ------------------------
 def build_norm_index(nv_path):
    """
    Liest NV_MASTER ein und erstellt:
    - norm_dict: Normalisierte Begriffe -> Einträge mit Name, ID, Sheet
    - lemma_index: Lemma -> Einträge
    """
    norm_dict = {}
    lemma_index = {}
    if not PANDAS_AVAILABLE:
        log("Pandas nicht verfügbar. NV_MASTER kann nicht gelesen werden.")
        return norm_dict, lemma_index
    try:
        sheets = pd.read_excel(nv_path, sheet_name=None, engine="odf")
    except Exception as e:
        log(f"Fehler beim Einlesen NV_MASTER: {e}")
        return norm_dict, lemma_index
    for sheet_name, df in sheets.items():
        if str(sheet_name).strip().lower() == "master":
            continue
        df = df.fillna("")
        cols = [str(c).strip().lower() for c in df.columns]
        id_col = None
        word_col = None
        for i, c in enumerate(cols):
            if "id" in c:
                id_col = df.columns[i]
            if "wort" in c or "vokabel" in c:
                word_col = df.columns[i]
        if word_col is None and len(df.columns) >= 1:
            word_col = df.columns[-1]
        if id_col is None and len(df.columns) >= 1:
            id_col = df.columns[0]
        current_parent_id = None
        for _, row in df.iterrows():
            id_val = str(row[id_col]).strip() if id_col in df.columns else ""
            word_val = str(row[word_col]).strip() if word_col in df.columns else ""
            if id_val:
                current_parent_id = id_val
            if not word_val:
                continue
            norm_name = normalize_text(word_val)
            lemma = lemmatize_term(word_val)
            entry = {"Name": word_val.strip(), "ID": current_parent_id or "", "Sheet": sheet_name}
            norm_dict.setdefault(norm_name, []).append(entry)
            lemma_index.setdefault(lemma, []).append(entry)
    log(f"NV_MASTER geladen ({NV_MASTER_PATH}). Begriffe: {sum(len(v) for v in norm_dict.values())}")
    return norm_dict, lemma_index
 # ------------------------
 # Matching-Funktionen
 # ------------------------
 def fuzzy_score(a, b):
    """Berechnet Fuzzy-Score zwischen zwei Strings. RapidFuzz oder fallback SequenceMatcher."""
    if RAPIDFUZZ_AVAILABLE:
        try:
            return fuzz.token_set_ratio(a, b) / 100.0
        except Exception:
            return 0.0
    else:
        try:
            return SequenceMatcher(None, a.lower(), b.lower()).ratio()
        except Exception:
            return 0.0
 def get_suggestions_for_term(term_lemma, norm_dict, lemma_index, threshold=CONF_THRESHOLD):
    """
    Liefert Vorschläge für ein Lemma, wenn kein exakter Treffer existiert.
    Score-basierte Sortierung, Duplikate werden entfernt.
    """
    candidates = []
    for key_lemma, entries in lemma_index.items():
        score = fuzzy_score(term_lemma, key_lemma)
        if key_lemma.startswith(term_lemma):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    for norm_key, entries in norm_dict.items():
        score = fuzzy_score(term_lemma, norm_key)
        if norm_key.startswith(term_lemma):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    candidates.sort(key=lambda t: t[0], reverse=True)
    seen = set()
    results = []
    for score, name, id_ in candidates:
        key = (name, id_)
        if key in seen:
            continue
        seen.add(key)
        results.append({"score": score, "name": name, "id": id_})
    return [f'{r["name"]} ({r["id"]})' if r["id"] else r["name"] for r in results]
 def map_term_with_indexes(term, norm_dict, lemma_index):
    """
    Mappt einen Term auf NV_MASTER:
    - Treffer
    - Vorschläge
    - IDs
    Nutzt Cache, um Wiederholungen zu vermeiden.
    """
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    if term_lemma in CACHE:
        cached = CACHE[term_lemma]
        return cached.get("hits", []), cached.get("suggestions", []), cached.get("ids", [])
    hits = []
    suggestions = []
    ids = []
    if term_norm in norm_dict:
        for e in norm_dict[term_norm]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    if not hits and term_lemma in lemma_index:
        for e in lemma_index[term_lemma]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    if not hits:
        suggestions = get_suggestions_for_term(term_lemma, norm_dict, lemma_index, threshold=CONF_THRESHOLD)
    # Duplikate entfernen
    def unique_preserve(seq):
        seen = set()
        out = []
        for x in seq:
            if x not in seen:
                seen.add(x)
                out.append(x)
        return out
    hits = unique_preserve(hits)
    suggestions = unique_preserve(suggestions)
    ids = unique_preserve(ids)
    CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
    return hits, suggestions, ids
 # ------------------------
 # Haupt-Makro
 # ------------------------
 def run_mapper_macro():
    """
    Haupt-Makro für LibreOffice:
    1. Bestimmt Header + Spalten
    2. Fügt Spalten für Norm_Treffer, Norm_Vorschlag, Kein_Treffer hinzu
    3. Liest NV_MASTER und baut Indizes
    4. Iteriert über Zeilen und Terms
    5. Markiert Zellen farblich (grün/gelb/rot)
    6. Schreibt Cache am Ende
    """
    try:
        doc = XSCRIPTCONTEXT.getDocument()
        sheet = doc.CurrentController.ActiveSheet
        cursor = sheet.createCursor()
        cursor.gotoStartOfUsedArea(False)
        cursor.gotoEndOfUsedArea(True)
        data_range = cursor.getRangeAddress()
    except Exception as e:
        log("Fehler: konnte Dokument/Sheet nicht öffnen: " + str(e))
        return
    # Header finden
    header_row = None
    objekt_col = None
    max_col = data_range.EndColumn
    for r in range(0, min(5, data_range.EndRow+1)):
        for c in range(0, max_col+1):
            try:
                val = str(sheet.getCellByPosition(c, r).String).strip().lower()
            except Exception:
                val = ""
            if val == "objektbeschreibung":
                header_row = r
                objekt_col = c
                break
        if objekt_col is not None:
            break
    if objekt_col is None:
        log("Spalte 'Objektbeschreibung' nicht gefunden. Abbruch.")
        return
    # Spalten anlegen, falls nicht vorhanden
    existing = {}
    for c in range(0, data_range.EndColumn+1):
        try:
            h = str(sheet.getCellByPosition(c, header_row).String).strip()
        except Exception:
            h = ""
        if h == "Norm_Treffer":
            existing["Norm_Treffer"] = c
        if h == "Norm_Vorschlag":
            existing["Norm_Vorschlag"] = c
    last_col = data_range.EndColumn
    if "Norm_Treffer" not in existing:
        last_col += 1
        existing["Norm_Treffer"] = last_col
        sheet.getCellByPosition(last_col, header_row).String = "Norm_Treffer"
    if "Norm_Vorschlag" not in existing:
        last_col += 1
        existing["Norm_Vorschlag"] = last_col
        sheet.getCellByPosition(last_col, header_row).String = "Norm_Vorschlag"
    if "Kein_Treffer" not in existing:
        last_col += 1
        existing["Kein_Treffer"] = last_col
        sheet.getCellByPosition(last_col, header_row).String = "Kein_Treffer"
    norm_tr_col = existing["Norm_Treffer"]
    norm_sug_col = existing["Norm_Vorschlag"]
    kein_tr_col = existing["Kein_Treffer"]
    # NV_MASTER laden
    norm_dict, lemma_index = build_norm_index(NV_MASTER_PATH)
    if not norm_dict and not lemma_index:
        log("NV_MASTER leer oder nicht lesbar. Abbruch.")
        return
    # Farben
    GREEN = 0xADFF2F
    YELLOW = 0xFFA500
    RED = 0xCC0000
    WHITE = 0xFFFFFF
    rows_processed = 0
    for r in range(header_row + 1, data_range.EndRow + 1):
        try:
            cell = sheet.getCellByPosition(objekt_col, r)
            txt = str(cell.String).strip()
            if not txt:
                continue
            # Term-Extraktion
            clauses = [c.strip() for c in re.split(r",", txt) if c.strip()]
            terms = []
            for cl in clauses:
                parts = [p.strip() for p in re.split(r"\s+", cl) if p.strip()]
                for p in parts:
                    if p.lower() in STOPWORDS:
                        continue
                    if re.fullmatch(r"\d+", p):
                        continue
                    terms.append(p)
            row_hits = []
            row_sugs = []
            row_ids = []
            unmapped_terms = []
            for term in terms:
                hits, sugs, ids = map_term_with_indexes(term, norm_dict, lemma_index)
                if hits:
                    row_hits.extend([f"{h} ({id_})" if id_ else h for h,id_ in zip(hits, ids + [""]*len(hits))])
                else:
                    unmapped_terms.append(term)
                    if sugs:
                        row_sugs.extend([f"{s}" for s in sugs])
                if ids:
                    row_ids.extend(ids)
            def uniq(seq):
                seen = set()
                out = []
                for x in seq:
                    if x not in seen:
                        seen.add(x)
                        out.append(x)
                return out
            row_hits = uniq(row_hits)
            row_sugs = uniq(row_sugs)
            unmapped_terms = uniq(unmapped_terms)
            # Farb-Logik
            if terms and not unmapped_terms and row_hits:
                cell.CellBackColor = GREEN
                row_sugs = []  # keine Vorschläge wenn alles Treffer
            elif row_hits:
                cell.CellBackColor = YELLOW
            else:
                cell.CellBackColor = RED
            # Ergebnisse schreiben
            tr_cell = sheet.getCellByPosition(norm_tr_col, r)
            tr_cell.String = " | ".join(row_hits)
            tr_cell.CellBackColor = GREEN if row_hits else WHITE
            sug_cell = sheet.getCellByPosition(norm_sug_col, r)
            sug_cell.String = " | ".join(row_sugs)
            sug_cell.CellBackColor = YELLOW if row_sugs else WHITE
            kt_cell = sheet.getCellByPosition(kein_tr_col, r)
            kt_cell.String = " | ".join(unmapped_terms)
            kt_cell.CellBackColor = RED if unmapped_terms else WHITE
            rows_processed += 1
        except Exception as e:
            log(f"Fehler in Zeile {r}: {e}\n{traceback.format_exc()}")
    # Cache speichern
    try:
        with open(CACHE_FILE, "w", encoding="utf-8") as f:
            json.dump(CACHE, f, ensure_ascii=False, indent=2)
    except Exception:
        pass
    log(f"run_mapper_macro fertig. Zeilen verarbeitet: {rows_processed}")
 # Export für LibreOffice
 g_exportedScripts = (run_mapper_macro,)
--- a/NV_MASTER.ods
+++ b/NV_MASTER.ods
--- a/NV_MASTER_Updated.ods
+++ b/NV_MASTER_Updated.ods
--- a/NV_Master_EditorFAIL.py
+++ b/NV_Master_EditorFAIL.py
@ -1,171 +0,0 @@
 import os
 import re
 import logging
 import datetime
 import pandas as pd
 from openpyxl.utils import get_column_letter
 from openpyxl.styles import Alignment
 import ezodf
 # ----------------- KONFIGURATION -----------------
 INPUT_FILE = r"/home/jarnold/projects/GND-Skript Test/Input CSV/Normvokabular_INTERN/NV_MASTER.ods"
 MASTER_SHEET_NAME = "Masterstruktur"
 today = datetime.datetime.today().strftime("%y.%m.%d")
 base, ext = os.path.splitext(INPUT_FILE)
 OUTPUT_FILE = f"{base}_Updated_{today}{ext}"
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 # ----------------- HILFSFUNKTIONEN -----------------
 def load_file(input_file):
    """
    Prüft Dateiformat und gibt für Excel: pd.ExcelFile + Engine zurück,
    für ODS: None + "odf" (da ODS direkt über ezodf gelesen wird).
    """
    ext = os.path.splitext(input_file)[1].lower()
    if ext in [".xlsx", ".xls"]:
        engine = "openpyxl"
        xls = pd.ExcelFile(input_file, engine=engine)
    elif ext == ".ods":
        engine = "odf"
        xls = None  # ODS wird direkt über ezodf gelesen
    else:
        raise ValueError(f"Nicht unterstütztes Dateiformat: {ext}")
    logging.info(f"Lade Datei {input_file} mit Engine '{engine}'")
    return xls, engine
 def read_ods_sheet(filename, sheet_name):
    """Liests ODS Sheet sauber ein, inklusive Header."""
    doc = ezodf.opendoc(filename)
    sheet = doc.sheets[sheet_name]
    data = []
    headers = [str(sheet[0, col].value).strip() for col in range(sheet.ncols())]
    for row_idx in range(1, sheet.nrows()):
        row = {}
        empty_row = True
        for col_idx, col_name in enumerate(headers):
            cell_val = sheet[row_idx, col_idx].value
            val = "" if cell_val is None else str(cell_val).strip()
            row[col_name] = val
            if val:
                empty_row = False
        if not empty_row:
            data.append(row)
    df = pd.DataFrame(data, columns=headers)
    return df
 def process_category_sheet(df):
    """Erstellt die treppenartige Hierarchie."""
    df = df.copy()
    for col in ["ID","Unterkategorie","Unterunterkategorie","Wort/Vokabel"]:
        if col not in df.columns:
            df[col] = ""
    rows = []
    current_id = ""
    current_uuk = ""
    for _, r in df.iterrows():
        id_val = str(r.get("ID","")).strip()
        uuk_val = str(r.get("Unterunterkategorie","")).strip()
        word_val = str(r.get("Wort/Vokabel","")).strip()
        if id_val:  # Kategoriezeile
            current_id = id_val
            current_uuk = uuk_val or word_val
            rows.append({"ID": current_id, "Unterkategorie": "", "Unterunterkategorie": current_uuk, "Wort/Vokabel": ""})
            continue
        if uuk_val:  # Unterunterkategorie
            current_uuk = uuk_val
            rows.append({"ID": "", "Unterkategorie": "", "Unterunterkategorie": current_uuk, "Wort/Vokabel": ""})
            continue
        if word_val:  # Vokabel
            rows.append({"ID": "", "Unterkategorie": "", "Unterunterkategorie": "", "Wort/Vokabel": word_val})
            continue
    return pd.DataFrame(rows, columns=["ID","Unterkategorie","Unterunterkategorie","Wort/Vokabel"])
 def remove_empty_vocabulary_rows(df):
    """Entfernt Zeilen, die nur leere Wort/Vokabel-Spalte haben."""
    return df[df["Wort/Vokabel"].astype(str).str.strip() != ""].copy().reset_index(drop=True)
 def sync_master_and_sheets(master_df, category_dfs):
    """Synchronisiert Kategorien nach Master, Vokabeln bleiben erhalten."""
    master_df = master_df.copy()
    master_df["ID"] = master_df["ID"].astype(str).str.strip()
    master_dict = dict(zip(master_df["ID"], master_df["Kategorie"]))
    updated_dfs = {}
    summary = {}
    for sheet_name, df in category_dfs.items():
        rows_out = []
        changes = {"removed":0}
        for _, row in df.iterrows():
            id_val = str(row.get("ID","")).strip()
            if id_val and id_val not in master_dict:
                changes["removed"] +=1
                continue
            rows_out.append(row.to_dict())
        updated_dfs[sheet_name] = pd.DataFrame(rows_out, columns=df.columns)
        summary[sheet_name] = changes
    new_master = pd.DataFrame([{"ID":k,"Kategorie":v} for k,v in sorted(master_dict.items())])
    return new_master, updated_dfs, summary
 def save_excel(processed_sheets, output_file):
    from openpyxl import Workbook
    with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
        for sheet_name, df in processed_sheets.items():
            df.to_excel(writer, sheet_name=sheet_name, index=False)
            ws = writer.sheets[sheet_name]
            for col_idx, col in enumerate(df.columns,1):
                max_len = max(df[col].astype(str).map(len).max() if len(df)>0 else 0,len(col))+2
                ws.column_dimensions[get_column_letter(col_idx)].width = max_len
                for row_idx in range(1,len(df)+2):
                    ws.cell(row=row_idx,column=col_idx).alignment = Alignment(horizontal='left')
 def save_ods(processed_sheets, output_file):
    doc = ezodf.newdoc(doctype="ods", filename=output_file)
    for name, df in processed_sheets.items():
        sheet = ezodf.Sheet(name, size=(len(df)+1,len(df.columns)))
        doc.sheets += sheet
        for col_idx, col_name in enumerate(df.columns):
            sheet[0,col_idx].set_value(col_name)
        for row_idx,row in enumerate(df.itertuples(index=False),start=1):
            for col_idx,value in enumerate(row):
                sheet[row_idx,col_idx].set_value("" if pd.isna(value) else value)
    doc.save()
 # ----------------- HAUPTPROGRAMM -----------------
 def main():
    xls, engine = load_file(INPUT_FILE)
    if engine == "odf":
        doc = ezodf.opendoc(INPUT_FILE)
        sheet_names = [s.name for s in doc.sheets if s.name != MASTER_SHEET_NAME]
        category_dfs = {name: process_category_sheet(read_ods_sheet(INPUT_FILE,name)) for name in sheet_names}
        master_df = read_ods_sheet(INPUT_FILE, MASTER_SHEET_NAME)
    else:
        sheet_names = [s for s in xls.sheet_names if s != MASTER_SHEET_NAME]
        category_dfs = {}
        for sheet_name in sheet_names:
            df = pd.read_excel(xls, sheet_name=sheet_name, engine=engine)
            df.columns = [str(c).strip() for c in df.columns]
            category_dfs[sheet_name] = process_category_sheet(df)
        master_df = pd.read_excel(xls, sheet_name=MASTER_SHEET_NAME, engine=engine)
        master_df.columns = [str(c).strip() for c in master_df.columns]
    new_master, updated_dfs, summary = sync_master_and_sheets(master_df, category_dfs)
    processed_sheets = {MASTER_SHEET_NAME:new_master}
    processed_sheets.update({k:remove_empty_vocabulary_rows(v) for k,v in updated_dfs.items()})
    ext_out = os.path.splitext(OUTPUT_FILE)[1].lower()
    if ext_out in [".xlsx",".xls"]:
        save_excel(processed_sheets, OUTPUT_FILE)
    else:
        save_ods(processed_sheets, OUTPUT_FILE)
    logging.info(f"Datei gespeichert: {OUTPUT_FILE}")
    logging.info("===== SYNC SUMMARY =====")
    for sheet, info in summary.items():
        logging.info(f"{sheet}: {info}")
 if __name__ == "__main__":
    main()
--- a/NV_Master_to_SPOT.py
+++ b/NV_Master_to_SPOT.py
@ -1,3 +1,32 @@
 """
 ===============================================================================
 Skriptname: NV_SPOT_Export.py
 Beschreibung:
    Dieses Skript soll hierarchische Normvokabular-Tabellen
    (ODS/XLSX-Format) in eine JSON-basierte SPOT-Struktur (Strukturierter
    Positionsbaum) konvertieren. Es ermöglicht das Exportieren in Excel und ODS, sowie
    das nachträgliche Ergänzen von Kategorien, Unterkategorien und Wörtern.
    //NOCH NICHT GETESTET//
 Hauptfunktionen:
    - Node:                Klasse zur Repräsentation von Baumknoten.
    - load_excel_or_ods:   Lädt Tabellen aus ODS/XLSX-Dateien.
    - process_sheet_to_tree: Erzeugt eine Baumstruktur aus einem Sheet.
    - save_spot_json:      Speichert den SPOT-Baum als JSON.
    - load_spot_json:      Lädt SPOT-Daten aus JSON-Dateien.
    - export_spot_to_excel: Exportiert den SPOT-Baum nach Excel.
    - export_spot_to_ods:   Exportiert den SPOT-Baum nach ODS.
    - add_category/subcategory/word: Fügt Elemente im Baum hinzu.
    - main:                Steuert den Workflow.
 Abhängigkeiten:
    Python 3.x, pandas, openpyxl, ezodf, json, logging, datetime
 Stand: 2025-10-01
 ===============================================================================
 """
 import os
 import json
 import datetime
@ -12,6 +41,20 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(
 # ---------------- SPOT-Baumstruktur ----------------
 class Node:
    """
    Repräsentiert einen Knoten in der SPOT-Baumstruktur.
    Attribute:
        name (str): Anzeigename des Knotens.
        id (str): Optionale ID (nur für Kategorien).
        type (str): Knotentyp ("category", "subcategory", "word").
        children (list[Node]): Unterknoten.
    Methoden:
        add_child(child): Fügt einen Unterknoten hinzu.
        to_dict(): Serialisiert den Knoten in ein Dictionary/JSON-kompatibles Format.
        from_dict(d): Rekonstruiert den Baum aus einem Dictionary.
    """
    def __init__(self, name, node_type="category", id=None):
        self.name = name
        self.id = id
@ -19,9 +62,11 @@ class Node:
        self.children = []
    def add_child(self, child):
        """Fügt dem aktuellen Knoten einen Unterknoten hinzu."""
        self.children.append(child)
    def to_dict(self):
        """Wandelt den Knoten (rekursiv) in ein Dictionary um."""
        if self.type == "word":
            return self.name
        return {
@ -33,14 +78,26 @@ class Node:
    @staticmethod
    def from_dict(d):
        """Erzeugt aus einem Dictionary ein Node-Objekt (rekursiv)."""
        if isinstance(d, str):
            return Node(d, "word")
        node = Node(d["name"], d.get("type", "category"), d.get("id"))
        node.children = [Node.from_dict(c) for c in d.get("children", [])]
        return node
 # ---------------- Funktionen zum Laden ----------------
 def load_excel_or_ods(input_file, master_sheet="Masterstruktur"):
    """
    Lädt ODS oder Excel-Datei und gibt Master- sowie Kategorien-DataFrames zurück.
    Parameter:
        input_file (str): Pfad zur Quelldatei.
        master_sheet (str): Name des Masterblattes.
    Rückgabe:
        (master_df, dfs): Master-DataFrame und Dictionary mit anderen Sheets.
    """
    ext = os.path.splitext(input_file)[1].lower()
    engine = "openpyxl" if ext in [".xlsx", ".xls"] else "odf"
    xls = pd.ExcelFile(input_file, engine=engine)
@ -49,26 +106,44 @@ def load_excel_or_ods(input_file, master_sheet="Masterstruktur"):
    master_df = pd.read_excel(xls, sheet_name=master_sheet, engine=engine)
    return master_df, dfs
 # ---------------- Baum aus Sheet erstellen ----------------
 def process_sheet_to_tree(df):
    """
    Wandelt ein Kategoriensheet in eine hierarchische Baumstruktur (Liste von Nodes) um.
    Struktur:
        Kategorie → Unterkategorie → Wort
    Parameter:
        df (pd.DataFrame): Eingabedaten mit Spalten ["ID", "Unterkategorie",
                            "Unterunterkategorie", "Wort/Vokabel"].
    Rückgabe:
        list[Node]: Liste von Baumknoten der obersten Ebene.
    """
    df = df.fillna("").astype(str)
    tree_nodes = []
    current_cat = None
    current_sub = None
    for idx, row in df.iterrows():
        id_val = row.get("ID", "").strip()
        uk_val = row.get("Unterkategorie", "").strip()
        uuk_val = row.get("Unterunterkategorie", "").strip()
        word_val = row.get("Wort/Vokabel", "").strip()
        # Neue Kategorieebene
        if id_val:
            current_cat = Node(uk_val or word_val, "category", id=id_val)
            tree_nodes.append(current_cat)
            current_sub = None
        # Unterkategorie
        elif uuk_val:
            current_sub = Node(uuk_val, "subcategory")
            if current_cat:
                current_cat.add_child(current_sub)
        # Wortebene
        elif word_val:
            word_node = Node(word_val, "word")
            if current_sub:
@ -77,28 +152,60 @@ def process_sheet_to_tree(df):
                current_cat.add_child(word_node)
    return tree_nodes
 # ---------------- SPOT laden/speichern ----------------
 def save_spot_json(tree_nodes, file_path):
    """
    Speichert den SPOT-Baum als JSON-Datei.
    Parameter:
        tree_nodes (list[Node]): Wurzelknoten der Baumstruktur.
        file_path (str): Zielpfad.
    """
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump([n.to_dict() for n in tree_nodes], f, indent=2, ensure_ascii=False)
    logging.info(f"SPOT gespeichert: {file_path}")
 def load_spot_json(file_path):
    """
    Lädt SPOT-JSON-Datei und rekonstruiert den Baum.
    Parameter:
        file_path (str): Pfad zur JSON-Datei.
    Rückgabe:
        list[Node]: Liste oberster Knoten.
    """
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return [Node.from_dict(n) for n in data]
 # ---------------- Export in Excel ----------------
 def export_spot_to_excel(tree_nodes, output_file):
    """
    Exportiert den SPOT-Baum in eine Excel-Datei.
    Struktur:
        Spalten A–D: ID, Kategorie, Unterkategorie, Wort.
    Parameter:
        tree_nodes (list[Node]): Baumstruktur.
        output_file (str): Zielpfad der Excel-Datei.
    """
    wb = Workbook()
    wb.remove(wb.active)
    for node in tree_nodes:
        ws = wb.create_sheet(title=node.name[:31])
        row_idx = 1
-        # Kategorie
+
        # Kategoriezeile
        ws.cell(row=row_idx, column=1, value=node.id)
        ws.cell(row=row_idx, column=2, value=node.name)
        row_idx += 1
        for sub in node.children:
            if sub.type == "subcategory":
                ws.cell(row=row_idx, column=3, value=sub.name)
@ -109,54 +216,99 @@ def export_spot_to_excel(tree_nodes, output_file):
            elif sub.type == "word":
                ws.cell(row=row_idx, column=4, value=sub.name)
                row_idx += 1
-        # Spaltenbreiten anpassen
+
-        for col_idx, col_letter in enumerate(["A","B","C","D"],1):
+        # Spaltenbreiten und Ausrichtung
        for col_idx, col_letter in enumerate(["A", "B", "C", "D"], 1):
            ws.column_dimensions[col_letter].width = 20
-            for r in range(1,row_idx):
+            for r in range(1, row_idx):
-                ws.cell(r,col_idx).alignment = Alignment(horizontal='left')
+                ws.cell(r, col_idx).alignment = Alignment(horizontal='left')
    wb.save(output_file)
    logging.info(f"Excel exportiert: {output_file}")
 # ---------------- Export in ODS ----------------
 def export_spot_to_ods(tree_nodes, output_file):
    """
    Exportiert den SPOT-Baum in eine ODS-Datei.
    Struktur analog zum Excel-Export.
    Parameter:
        tree_nodes (list[Node]): Baumstruktur.
        output_file (str): Zielpfad der ODS-Datei.
    """
    doc = ezodf.newdoc(doctype="ods", filename=output_file)
    for node in tree_nodes:
-        sheet = ezodf.Sheet(node.name[:31], size=(len(node.children)+10,4))
+        sheet = ezodf.Sheet(node.name[:31], size=(len(node.children) + 10, 4))
        doc.sheets += sheet
-        sheet[0,0].set_value("ID")
+
-        sheet[0,1].set_value("Unterkategorie")
+        sheet[0, 0].set_value("ID")
-        sheet[0,2].set_value("Unterunterkategorie")
+        sheet[0, 1].set_value("Unterkategorie")
-        sheet[0,3].set_value("Wort/Vokabel")
+        sheet[0, 2].set_value("Unterunterkategorie")
        sheet[0, 3].set_value("Wort/Vokabel")
        row_idx = 1
-        sheet[row_idx,0].set_value(node.id)
+        sheet[row_idx, 0].set_value(node.id)
-        sheet[row_idx,1].set_value(node.name)
+        sheet[row_idx, 1].set_value(node.name)
-        row_idx +=1
+        row_idx += 1
        for sub in node.children:
            if sub.type == "subcategory":
-                sheet[row_idx,2].set_value(sub.name)
+                sheet[row_idx, 2].set_value(sub.name)
-                row_idx +=1
+                row_idx += 1
                for word in sub.children:
-                    sheet[row_idx,3].set_value(word.name)
+                    sheet[row_idx, 3].set_value(word.name)
-                    row_idx +=1
+                    row_idx += 1
            elif sub.type == "word":
-                sheet[row_idx,3].set_value(sub.name)
+                sheet[row_idx, 3].set_value(sub.name)
-                row_idx +=1
+                row_idx += 1
    doc.save()
    logging.info(f"ODS exportiert: {output_file}")
 # ---------------- CLI-Funktionen zum Editieren ----------------
 def add_category(tree_nodes, cat_id, cat_name):
    """
    Fügt eine neue Kategorie zum SPOT-Baum hinzu.
    Parameter:
        tree_nodes (list[Node]): Liste der obersten Knoten.
        cat_id (str): ID der Kategorie.
        cat_name (str): Name der Kategorie.
    """
    tree_nodes.append(Node(cat_name, "category", id=cat_id))
    logging.info(f"Kategorie hinzugefügt: {cat_id} {cat_name}")
 def add_subcategory(tree_nodes, cat_id, sub_name):
    """
    Fügt einer vorhandenen Kategorie eine Unterkategorie hinzu.
    Parameter:
        tree_nodes (list[Node]): Wurzelknoten.
        cat_id (str): Zielkategorie-ID.
        sub_name (str): Name der Unterkategorie.
    """
    for cat in tree_nodes:
        if cat.id == cat_id:
            cat.add_child(Node(sub_name, "subcategory"))
            logging.info(f"Unterkategorie hinzugefügt: {sub_name} in {cat_id}")
            return
 def add_word(tree_nodes, cat_id, sub_name, word_name):
    """
    Fügt einem Unterknoten ein Wort hinzu.
    Parameter:
        tree_nodes (list[Node]): Wurzelknoten.
        cat_id (str): ID der Kategorie.
        sub_name (str): Name der Unterkategorie.
        word_name (str): Neues Wort.
    """
    for cat in tree_nodes:
        if cat.id == cat_id:
            for sub in cat.children:
@ -165,9 +317,18 @@ def add_word(tree_nodes, cat_id, sub_name, word_name):
                    logging.info(f"Wort hinzugefügt: {word_name} unter {sub_name}")
                    return
 # ---------------- HAUPTPROGRAMM ----------------
 def main():
-    INPUT_FILE = "NV_MASTER.ods"   # Beispielpfad
+    """
    Ablauf:
      1. Liest Masterdatei (ODS oder XLSX).
      2. Wandelt Kategorienblätter in SPOT-Struktur um.
      3. Speichert SPOT als JSON.
      4. Exportiert SPOT nach Excel und ODS.
      5. Optional: Bearbeiten des Baums über CLI-Funktionen.
    """
    INPUT_FILE = "NV_MASTER.ods"
    OUTPUT_SPOT = "nv_spot.json"
    today = datetime.datetime.today().strftime("%y.%m.%d")
    OUTPUT_EXCEL = f"NV_MASTER_SPOT_{today}.xlsx"
@ -177,9 +338,10 @@ def main():
    spot_tree = []
    for sheet, df in dfs.items():
        spot_tree.extend(process_sheet_to_tree(df))
    save_spot_json(spot_tree, OUTPUT_SPOT)
-    # Beispiel: Editieren
+    # Beispielhafte Nutzung der Editierfunktionen:
    # add_category(spot_tree, "10.1", "Neue Kategorie")
    # add_subcategory(spot_tree, "10.1", "Neue Unterunterkategorie")
    # add_word(spot_tree, "10.1", "Neue Unterunterkategorie", "Neues Wort")
@ -188,5 +350,6 @@ def main():
    export_spot_to_ods(spot_tree, OUTPUT_ODS)
    logging.info("SPOT-Workflow abgeschlossen.")
 if __name__ == "__main__":
    main()
--- a/NormVokabular_Mapper_1.4.py
+++ b/NormVokabular_Mapper_1.4.py
@ -1,13 +1,10 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-NormVokabular Mapper – Version 1.4.1
+NormVokabular Mapper – Version 1.4.2
- Detailliertes (DEBUG) Batch-Logging: gepufferte Logs werden periodisch in Konsole + Datei geschrieben
+
- Getty AAT (SPARQL via requests) – API-polite, timeout/retries/backoff
+Dieses Skript normalisiert und mappt Begriffe aus Input-Dateien auf ein zentrales Normvokabular
- Fehlertoleranz: API-Ausfälle führen nicht zum Totalabsturz
+und führt optional API-Abgleiche mit GND und Wikidata durch. Ergebnisse werden in Excel/ODS gespeichert.
 - Fehlende Begriffe -> separate Datei (gleiches Format wie Output)
 - Bestehende Normalisierung/Lemmatisierung/Stemming wird weiterverwendet
 - Batch-Logging-Modus (konfigurierbar)
 """
 from __future__ import annotations
@ -25,50 +22,52 @@ from collections import defaultdict
 from difflib import SequenceMatcher
 from datetime import datetime
-# Optional libs
+# Optional Libraries
 try:
-    from rapidfuzz import fuzz
+    from rapidfuzz import fuzz  # für schnellere String-Similarity
    RAPIDFUZZ_AVAILABLE = True
 except Exception:
    RAPIDFUZZ_AVAILABLE = False
 try:
    import spacy
-    nlp = spacy.load("de_core_news_sm")
+    nlp = spacy.load("de_core_news_sm")  # deutsche Lemmatization
    SPACY_AVAILABLE = True
 except Exception:
    SPACY_AVAILABLE = False
    nlp = None
 # =========================
-# Config & Pfade
+# Konfiguration & Pfade
 # =========================
-INPUT_DIR = Path("Input CSV")
+INPUT_DIR = Path("Input CSV")                # Eingabeverzeichnis
-OUTPUT_DIR = Path("Auswertung Ergebnisse")
+OUTPUT_DIR = Path("Auswertung Ergebnisse")  # Ausgabeordner
-OUTPUT_DIR.mkdir(exist_ok=True)
+OUTPUT_DIR.mkdir(exist_ok=True)             # Verzeichnis erstellen, falls nicht vorhanden
-NORMVOC_FILE = Path("Input CSV/Normvokabular_INTERN/NV_MASTER.ods")
+NORMVOC_FILE = Path("Input CSV/Normvokabular_INTERN/NV_MASTER.ods")  # Normvokabular-Datei
-CACHE_FILE = "api_cache.json"
+CACHE_FILE = "api_cache.json"               # Cache für API-Antworten
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
-CONF_THRESHOLD = 0.75
+CONF_THRESHOLD = 0.75                       # Threshold für Vorschläge
 TIMEOUT_DEFAULT = 5
 MAX_RETRIES_DEFAULT = 3
 BACKOFF_FACTOR_DEFAULT = 2
 HEADERS = {"User-Agent": "VLG_API_multi/1.0 (projekt@example.com)"}
-API_ACTIVE = {"gnd": True, "wikidata": True, "aat": True}
+API_ACTIVE = {"gnd": True, "wikidata": True}  # API-Verfügbarkeit
-FAIL_COUNTER = {"gnd": 0, "wikidata": 0, "aat": 0}
+FAIL_COUNTER = {"gnd": 0, "wikidata": 0}
-# Logging file
+# Logging-Parameter
 LOG_FILE = OUTPUT_DIR / "mapper_log.txt"
-
+LOG_BATCH_SIZE = 100        # Anzahl Logs vor Flush
-# Batch logging parameters
+LOG_FLUSH_INTERVAL = 5.0    # Sekunden zwischen Flushes
-LOG_BATCH_SIZE = 100        # flush wenn >= Einträge
+LOG_LEVEL = "DEBUG"          # Logging-Level
 LOG_FLUSH_INTERVAL = 5.0   # Sekunden zwischen Flushes (Batch-Logging)
 LOG_LEVEL = "DEBUG"        # ausführlich gewünscht
 # =========================
-# Buffered/Batched Logger
+# Batch/Buffered Logger
 # =========================
 class BatchLogger:
    """
    Buffered Logger: Speichert Logs in einem Queue-Buffer und schreibt sie periodisch in Datei und Konsole.
    Reduziert I/O-Aufwand bei vielen Logs.
    """
    def __init__(self, logfile: Path, flush_interval: float = 5.0, batch_size: int = 100, level: str = "DEBUG"):
        self.logfile = logfile
        self.flush_interval = flush_interval
@ -77,7 +76,7 @@ class BatchLogger:
        self.q = queue.Queue()
        self._stop_event = threading.Event()
        self._thread = threading.Thread(target=self._worker, daemon=True, name="BatchLoggerThread")
-        # Ensure logfile exists
+        # Sicherstellen, dass die Log-Datei existiert
        try:
            logfile.parent.mkdir(parents=True, exist_ok=True)
            logfile.touch(exist_ok=True)
@ -86,35 +85,33 @@ class BatchLogger:
        self._thread.start()
    def _format(self, level: str, msg: str) -> str:
        """Formatiert Logeinträge mit Timestamp"""
        ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        return f"{ts} - {level} - {msg}"
    def log(self, level: str, msg: str):
        """Fügt Log dem Queue hinzu und löst Flush aus, falls Batchgröße erreicht"""
        if self._stop_event.is_set():
            return
        formatted = self._format(level, msg)
        self.q.put((level, formatted))
        # If queue too big, trigger immediate flush by putting a special token
        if self.q.qsize() >= self.batch_size:
            self.q.put(("__FLUSH__", "__FLUSH__"))
    def debug(self, msg: str):
        if LOG_LEVEL in ("DEBUG",):
            self.log("DEBUG", msg)
    def info(self, msg: str):
        self.log("INFO", msg)
    def warning(self, msg: str):
        self.log("WARNING", msg)
    def error(self, msg: str):
        self.log("ERROR", msg)
    def exception(self, msg: str):
        self.log("EXCEPTION", msg)
    def _worker(self):
        """Hintergrund-Thread: verarbeitet Queue, schreibt Logs periodisch"""
        buffer = []
        last_flush = time.time()
        while not self._stop_event.is_set() or not self.q.empty():
@ -123,7 +120,6 @@ class BatchLogger:
                try:
                    item = self.q.get(timeout=self.flush_interval)
                except queue.Empty:
                    # time-based flush
                    if buffer:
                        self._flush_buffer(buffer)
                        buffer = []
@ -141,36 +137,30 @@ class BatchLogger:
                    continue
                buffer.append((level, formatted))
                # flush conditions
                if len(buffer) >= self.batch_size or (time.time() - last_flush) >= self.flush_interval:
                    self._flush_buffer(buffer)
                    buffer = []
                    last_flush = time.time()
            except Exception as e:
                # As a last resort, write error immediately to stderr
                try:
                    sys.stderr.write(f"BatchLogger worker error: {e}\n")
                except Exception:
                    pass
                time.sleep(0.5)
        # final flush
        if buffer:
            self._flush_buffer(buffer)
    def _flush_buffer(self, buffer):
        """Schreibt Puffer in Datei und Konsole"""
        if not buffer:
            return
        # write to console and file
        try:
            # console
            out_lines = [f"{line}\n" for _, line in buffer]
            # write to stdout
            try:
                sys.stdout.writelines(out_lines)
                sys.stdout.flush()
            except Exception:
                pass
            # append to file
            try:
                with open(self.logfile, "a", encoding="utf-8") as f:
                    f.writelines(out_lines)
@ -183,17 +173,17 @@ class BatchLogger:
            pass
    def stop(self):
        """Stoppt Logger-Thread"""
        self._stop_event.set()
        # put sentinel to wake worker
        try:
            self.q.put(("__FLUSH__", "__FLUSH__"))
        except Exception:
            pass
        self._thread.join(timeout=5.0)
-# Instantiate logger
+# Logger-Instanz erstellen
 logger = BatchLogger(LOG_FILE, flush_interval=LOG_FLUSH_INTERVAL, batch_size=LOG_BATCH_SIZE, level=LOG_LEVEL)
-logger.info("Starte NormVokabular Mapper v1.4.1 (Batch-Logging aktiv)")
+logger.info("Starte NormVokabular Mapper v1.4.2 (Batch-Logging aktiv)")
 # =========================
 # Cache laden/speichern
@ -210,6 +200,7 @@ else:
    CACHE = {}
 def save_cache():
    """Speichert aktuellen Cache in JSON"""
    try:
        with open(CACHE_FILE,"w",encoding="utf-8") as f:
            json.dump(CACHE, f, indent=2, ensure_ascii=False)
@ -221,6 +212,7 @@ def save_cache():
 # Normalisierung / Lemma / Tokenization
 # =========================
 def normalize_text(s):
    """Text in Kleinbuchstaben, Sonderzeichen entfernen, Trim"""
    if not s:
        return ""
    s = str(s).lower().strip()
@ -229,8 +221,8 @@ def normalize_text(s):
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    """Lemmatize mit spaCy, Cache für Performance"""
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
@ -246,6 +238,7 @@ def lemmatize_term(term):
    return lemma
 def compound_split(term):
    """Splittet Komposita nach -, _, / oder Leerzeichen"""
    if not term:
        return []
    parts = [p for p in re.split(r"[\s\-_/]+", term) if p]
@ -255,24 +248,29 @@ def compound_split(term):
 # Normvokabular laden & Index
 # =========================
 def load_normvokabular(file_path):
    """Lädt Normvokabular aus Excel/ODS, erstellt Dictionarys für Mapping"""
    try:
        sheets = pd.read_excel(file_path, sheet_name=None, engine="odf" if file_path.suffix.lower()==".ods" else None)
    except Exception as e:
        logger.error(f"Normvokabular konnte nicht geladen werden: {e}")
        raise
    norm_dict = {}
    stem_index = defaultdict(list)
    lemma_norm_map = {}
    for sheet_name, df in sheets.items():
        if sheet_name.lower() in ["master", "übersicht"]:
-            continue
+            continue  # Übersichtsblätter ignorieren
        df = df.dropna(how="all", axis=1)
        df.columns = [str(c).strip() for c in df.columns]
        # ID- und Wort-Spalte finden
        id_col = next((c for c in df.columns if "ID" in c), None)
        word_col = next((c for c in df.columns if "Wort" in c or "Vokabel" in c or "Begriff" in c), None)
        if not id_col or not word_col:
            continue
        current_parent_id = None
        for _, row in df.iterrows():
            row_id = str(row[id_col]).strip() if pd.notna(row[id_col]) else None
@ -296,6 +294,10 @@ def load_normvokabular(file_path):
 # Mapping & Vorschläge
 # =========================
 def map_to_norm(term, norm_dict, stem_index, lemma_norm_map, top_n=3):
    """
    Mappt einen Begriff auf Normvokabular.
    Prüft exakte Treffer, Lemma-Treffer, Komposita und generiert Vorschläge.
    """
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
@ -329,6 +331,7 @@ def map_to_norm(term, norm_dict, stem_index, lemma_norm_map, top_n=3):
        return "KEIN TREFFER", "", combined_suggestions
 def get_suggestions(term_lemma, lemma_norm_map, top_n=3, threshold=CONF_THRESHOLD):
    """Ermittelt Vorschläge basierend auf Similarity"""
    candidates = []
    for key_lemma, entry in lemma_norm_map.items():
        if RAPIDFUZZ_AVAILABLE:
@ -346,10 +349,14 @@ def get_suggestions(term_lemma, lemma_norm_map, top_n=3, threshold=CONF_THRESHOL
 # Generic request with retries & caching
 # =========================
 def request_with_retries_generic(api_name, url, params=None, headers=None, timeout=TIMEOUT_DEFAULT, max_retries=MAX_RETRIES_DEFAULT, backoff=BACKOFF_FACTOR_DEFAULT):
    """
    Sendet GET-Requests mit Retry-Logik, Backoff und Caching
    """
    cache_key = url + (json.dumps(params, sort_keys=True, ensure_ascii=False) if params else "")
    if cache_key in CACHE:
        logger.debug(f"[Cache] {api_name}: {cache_key}")
        return CACHE[cache_key]
    retries = 0
    while retries < max_retries:
        try:
@ -378,9 +385,10 @@ def request_with_retries_generic(api_name, url, params=None, headers=None, timeo
    return None
 # =========================
-# GND / Wikidata (bestehend)
+# GND / Wikidata Batch Queries
 # =========================
 def batch_query_gnd(terms):
    """Batch-Abfrage der Begriffe bei GND"""
    results = {}
    if not API_ACTIVE.get("gnd", False):
        for t in terms: results[t] = ""
@ -409,6 +417,7 @@ def batch_query_gnd(terms):
    return results
 def batch_query_wikidata(terms):
    """Batch-Abfrage der Begriffe bei Wikidata"""
    results = {}
    if not API_ACTIVE.get("wikidata", False):
        for t in terms: results[t] = ""
@ -423,10 +432,13 @@ def batch_query_wikidata(terms):
        top = ""
        try:
            if data and "search" in data:
                # Ermittlung der Kandidaten mit Ähnlichkeitsbewertung
                cands = [(e.get("label",""), SequenceMatcher(None, t.lower(), e.get("label","").lower()).ratio())
                         for e in data["search"] if e.get("label","")]
                # Filterung nach Mindestähnlichkeit (0.70)
                cands = [c for c in cands if c[1] >= 0.70]
                if cands:
                    # Bestes Ergebnis nach Ähnlichkeit auswählen
                    top = sorted(cands, key=lambda x: x[1], reverse=True)[0][0]
        except Exception as e:
            logger.debug(f"[WD] Fehler bei Verarbeitung für '{t}': {e}")
@ -435,93 +447,14 @@ def batch_query_wikidata(terms):
    logger.info(f"[WD] Fertig. Dauer: {elapsed:.1f}s")
    return results
 # =========================
 # Getty AAT Abfrage – robust & API-polite (requests)
 # =========================
 def batch_query_getty_aat(terms):
    results = {}
    if not API_ACTIVE.get("aat", False):
        for t in terms: results[t] = ""
        return results
    endpoint = "https://vocab.getty.edu/sparql"
    headers = {"Accept": "application/sparql-results+json", "User-Agent": HEADERS.get("User-Agent")}
    TIMEOUT = 8
    MAX_RETRIES = 3
    BACKOFF_FACTOR = 2
    FAIL_LIMIT = 5
    fail_counter_local = 0
    logger.info(f"[AAT] Starte Getty AAT-Abgleich für {len(terms)} Terme")
    start_all = time.time()
    for idx, term in enumerate(terms, start=1):
        term_norm = lemmatize_term(normalize_text(term))
        tokens = compound_split(term_norm)
        logger.debug(f"[AAT] ({idx}/{len(terms)}) Begriff '{term}' -> Tokens: {tokens}")
        query_fragments = []
        for tkn in tokens:
            t_escaped = tkn.replace('"', '\\"')
            qf = f"""
                ?concept skos:prefLabel ?label .
                FILTER(lang(?label)='de' && CONTAINS(LCASE(?label), LCASE("{t_escaped}")))
            """
            query_fragments.append(f"{{ {qf} }}")
        query_body = " UNION ".join(query_fragments) if query_fragments else ""
        query = f"PREFIX skos: <http://www.w3.org/2004/02/skos/core#> SELECT ?label ?concept WHERE {{ {query_body} }} LIMIT 10"
        retries = 0
        success = False
        start_term = time.time()
        while retries < MAX_RETRIES and not success:
            try:
                logger.debug(f"[AAT] Anfrage (Retry {retries}) für '{term}'")
                r = requests.get(endpoint, params={"query": query}, headers=headers, timeout=TIMEOUT)
                if r.status_code != 200:
                    raise ValueError(f"HTTP {r.status_code}")
                ret = r.json()
                candidates = [(b['label']['value'], b['concept']['value']) for b in ret.get("results", {}).get("bindings", [])]
                if candidates:
                    scored = [
                        (c[0], c[1], SequenceMatcher(None, term_norm, lemmatize_term(normalize_text(c[0]))).ratio())
                        for c in candidates
                    ]
                    top = max(scored, key=lambda x: x[2])
                    results[term] = top[0]
                    logger.debug(f"[AAT] Treffer für '{term}': {results[term]} (Score: {top[2]:.3f})")
                else:
                    results[term] = ""
                    logger.debug(f"[AAT] Kein Treffer für '{term}'")
                success = True
            except Exception as e:
                retries += 1
                wait = BACKOFF_FACTOR ** retries
                logger.warning(f"[AAT] Fehler ({retries}/{MAX_RETRIES}) für '{term}': {e} – warte {wait}s")
                time.sleep(wait)
                if retries == MAX_RETRIES:
                    results[term] = ""
                    fail_counter_local += 1
            # polite delay
            time.sleep(1.0)
        elapsed_term = time.time() - start_term
        logger.debug(f"[AAT] Dauer für '{term}': {elapsed_term:.2f}s")
        if fail_counter_local >= FAIL_LIMIT:
            logger.error("[AAT] Zu viele Fehler lokal - breche AAT-Abfragen ab.")
            for t_rem in terms[idx:]:
                results[t_rem] = ""
            FAIL_COUNTER["aat"] += fail_counter_local
            API_ACTIVE["aat"] = False
            break
    elapsed_all = time.time() - start_all
    logger.info(f"[AAT] Getty AAT-Abgleich abgeschlossen. Dauer: {elapsed_all:.1f}s")
    return results
 # =========================
 # Markierung / Export (Excel/ODS)
 # =========================
 def mark_norm_hits(file_path):
    """
    Markiert Treffer in Excel/ODS farblich:
    Grün = Treffer, Rot = KEIN TREFFER
    """
    ext = file_path.suffix.lower()
    try:
        if ext in [".xlsx", ".xls"]:
@ -529,12 +462,14 @@ def mark_norm_hits(file_path):
            from openpyxl.styles import PatternFill
            wb = load_workbook(file_path)
            ws = wb.active
            # Spaltenmapping anhand der Kopfzeile
            col_map = {cell.value: idx+1 for idx, cell in enumerate(ws[1])}
            norm_col = col_map.get("Norm_Treffer", None)
            if not norm_col:
                logger.debug("Spalte 'Norm_Treffer' nicht gefunden (mark_norm_hits).")
                wb.save(file_path)
                return
            # Farben definieren
            green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
            red_fill   = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
            for row in ws.iter_rows(min_row=2, min_col=norm_col, max_col=norm_col):
@ -544,9 +479,10 @@ def mark_norm_hits(file_path):
                else:
                    cell.fill = red_fill
            wb.save(file_path)
-        elif ext==".ods":
+        elif ext == ".ods":
            # ODS: kein Zell-Fill, stattdessen Status-Spalte
            df = pd.read_excel(file_path, engine="odf")
-            df["Norm_Status"] = df["Norm_Treffer"].apply(lambda x: "Treffer" if pd.notna(x) and str(x).strip() and x!="KEIN TREFFER" else "Kein Treffer")
+            df["Norm_Status"] = df["Norm_Treffer"].apply(lambda x: "Treffer" if pd.notna(x) and str(x).strip() and x != "KEIN TREFFER" else "Kein Treffer")
            df.to_excel(file_path, index=False, engine="odf")
    except Exception as e:
        logger.warning(f"Fehler beim Markieren der Treffer in {file_path}: {e}")
@ -555,6 +491,9 @@ def mark_norm_hits(file_path):
 # Fehlende Begriffe -> separate Datei
 # =========================
 def export_missing_terms(out_df, output_file):
    """
    Speichert Begriffe ohne Treffer oder Vorschläge in separater Datei
    """
    missing_df = out_df[
        (out_df["Norm_Treffer"] == "KEIN TREFFER") &
        (out_df["Norm_Vorschlag"].isna() | (out_df["Norm_Vorschlag"].str.strip() == ""))
@ -562,7 +501,6 @@ def export_missing_terms(out_df, output_file):
    count_missing = len(missing_df)
    logger.info(f"Anzahl Begriffe ohne Treffer und Vorschläge: {count_missing}")
    if count_missing == 0:
        return
@ -589,8 +527,10 @@ def export_missing_terms(out_df, output_file):
 # Haupt-Loop: Verarbeitung Input-Dateien
 # =========================
 def process_files():
    """Verarbeitet alle Dateien im Input-Ordner, mappt Begriffe und speichert Ergebnisse"""
    overall_start = time.time()
    try:
        # Normvokabular laden
        norm_dict, stem_index, lemma_norm_map = load_normvokabular(NORMVOC_FILE)
    except Exception as e:
        logger.error("Normvokabular konnte nicht geladen werden. Beende.")
@ -626,6 +566,7 @@ def process_files():
        df = df.dropna(how="all")
        df.columns = [str(c).strip() for c in df.columns]
        # Spalten identifizieren
        besch_col = next((c for c in df.columns if "Objektbeschreibung" in c), None)
        box_col = next((c for c in df.columns if "Objekt/Ebene" in c), None)
        urh_col = next((c for c in df.columns if "Urheber" in c), None)
@ -633,6 +574,7 @@ def process_files():
            logger.warning(f"Spalte 'Objektbeschreibung' nicht gefunden in {file_path.name}. Datei übersprungen.")
            continue
        # Begriffe extrahieren
        row_terms_map = []
        for r_idx, row in enumerate(df.itertuples(index=False), start=1):
            try:
@ -657,9 +599,11 @@ def process_files():
            if (r_idx % 200) == 0:
                logger.debug(f"[{file_path.name}] Zeile {r_idx} verarbeitet")
        # Alle einzigartigen Terme für API-Abfragen
        all_terms = list({t for _,_,terms in row_terms_map for t in terms})
        logger.info(f"[{file_path.name}] Gefundene unique Terme: {len(all_terms)}")
        total_unique_terms = len(all_terms)
        # API-Abfragen
        t0 = time.time()
        gnd_results = batch_query_gnd(all_terms)
@ -668,9 +612,6 @@ def process_files():
        wd_results = batch_query_wikidata(all_terms)
        t2 = time.time()
        logger.info(f"[{file_path.name}] Wikidata-Abfragen Dauer: {t2-t1:.1f}s")
        aat_results = batch_query_getty_aat(all_terms) if API_ACTIVE.get("aat", False) else {t:"" for t in all_terms}
        t3 = time.time()
        logger.info(f"[{file_path.name}] AAT-Abfragen Dauer: {t3-t2:.1f}s")
        # Build output rows
        output_rows = []
@ -690,58 +631,30 @@ def process_files():
                    "Norm_ID": norm_id,
                    "Norm_Vorschlag": ", ".join(suggestions) if suggestions else "",
                    "GND_Top1": gnd_results.get(term,""),
-                    "WD_Top1": wd_results.get(term,""),
+                    "WD_Top1": wd_results.get(term,"")
                    "AAT_Top1": aat_results.get(term,"")
                }
                output_rows.append(out_row)
                processed_count += 1
                if (processed_count % 200) == 0:
                    logger.debug(f"[{file_path.name}] {processed_count}/{total_unique_terms} Terme verarbeitet")
        # Save output
        out_df = pd.DataFrame(output_rows)
-        output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}{file_path.suffix}"
+        out_file = OUTPUT_DIR / f"{file_path.stem}_mapped.xlsx"
        version = 1
        while output_file.exists():
            output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}_({version}){file_path.suffix}"
            version += 1
        engine = "odf" if output_file.suffix.lower()==".ods" else None
        try:
-            out_df.to_excel(output_file, index=False, engine=engine)
+            out_df.to_excel(out_file, index=False, engine="openpyxl")
-            logger.info(f"[{file_path.name}] Auswertung gespeichert: {output_file}")
+            logger.info(f"Ergebnisse gespeichert: {out_file}")
            mark_norm_hits(out_file)
            export_missing_terms(out_df, out_file)
        except Exception as e:
-            logger.error(f"[{file_path.name}] Fehler beim Speichern der Auswertung {output_file}: {e}")
+            logger.error(f"Fehler beim Speichern der Ergebnisse für {file_path.name}: {e}")
            continue
-        export_missing_terms(out_df, output_file)
+    elapsed_total = time.time() - overall_start
-        mark_norm_hits(output_file)
+    logger.info(f"Verarbeitung abgeschlossen. Gesamtzeit: {elapsed_total:.1f}s")
    logger.info(f"Gesamtterme: {total_terms}, Treffer: {total_hits}, Trefferquote: {total_hits/total_terms:.2%}" if total_terms else "")
        file_elapsed = time.time() - file_start
        logger.info(f"[Datei {file_idx}/{len(files)}] Fertig ({file_elapsed:.1f}s)")
    overall_elapsed = time.time() - overall_start
    logger.info(f"Fertig. Gesamt: {total_terms} Begriffe, {total_hits} Treffer im Normvokabular. Gesamtzeit: {overall_elapsed:.1f}s")
 # =========================
 # Main
 # =========================
 if __name__ == "__main__":
    try:
        process_files()
    except KeyboardInterrupt:
        logger.warning("Abbruch durch Benutzer (KeyboardInterrupt).")
    except SystemExit:
        logger.warning("SystemExit aufgetreten.")
    except Exception as e:
        logger.exception(f"Ungefangener Fehler: {e}")
    finally:
        # Stop logger (flush remaining logs)
        try:
    save_cache()
        except Exception:
            pass
        try:
            logger.info("Beende.")
    logger.stop()
-        except Exception:
+
-            pass
+if __name__ == "__main__":
    process_files()
--- a/NormVokabular_Mapper_Wrapper.py
+++ b/NormVokabular_Mapper_Wrapper.py
@ -1,46 +0,0 @@
 import subprocess
 import json
 import sys
 from pathlib import Path
 def run_mapper(term):
    """
    Ruft das bestehende mapper script auf und liefert Vorschläge zurück.
    Erwartet, dass das mapper script eine JSON-Ausgabe liefert:
    {
        "term": "Begriff",
        "norm_name": "Normierter Treffer oder KEIN TREFFER",
        "norm_id": "ID",
        "suggestions": ["Vorschlag1", "Vorschlag2", "Vorschlag3"]
    }
    """
    mapper_script = Path("/home/jarnold/projects/GND-Skript Test/NormVokabular_Mapper_1.2.py")  # dein bestehendes Mapper-Skript
    if not mapper_script.exists():
        raise FileNotFoundError(f"{mapper_script} nicht gefunden")
    # Übergabe als JSON-String
    input_json = json.dumps({"term": term})
    # Aufruf via subprocess
    result = subprocess.run(
        [sys.executable, str(mapper_script), input_json],
        capture_output=True,
        text=True
    )
    if result.returncode != 0:
        raise RuntimeError(f"Mapper Fehler: {result.stderr}")
    try:
        output = json.loads(result.stdout)
    except Exception as e:
        raise ValueError(f"Ungültige Ausgabe vom Mapper: {e}")
    return output
 if __name__ == "__main__":
    if len(sys.argv) > 1:
        term = sys.argv[1]
        output = run_mapper(term)
        print(json.dumps(output, ensure_ascii=False))
--- a/Normvokabular_Mapper_Alte_Versionen/NormVokabular_Mapper_1.1.py
+++ b/Normvokabular_Mapper_Alte_Versionen/NormVokabular_Mapper_1.1.py
--- a/Normvokabular_Mapper_Alte_Versionen/NormVokabular_Mapper_1.2.py
+++ b/Normvokabular_Mapper_Alte_Versionen/NormVokabular_Mapper_1.2.py
--- a/Normvokabular_Mapper_Alte_Versionen/NormVokabular_Mapper_1.3.py
+++ b/Normvokabular_Mapper_Alte_Versionen/NormVokabular_Mapper_1.3.py
--- a/Glotin.ods
+++ b/Glotin.ods
--- a/API.ods
+++ b/API.ods
--- a/Tryout/NVTest.py
+++ b/Tryout/NVTest.py
@ -1,101 +0,0 @@
 import pandas as pd
 import requests
 import time
 import os
 def match_gnd(token, delay=0.3):
    """GND-Abfrage für ein Schlagwort, gibt erstes Ergebnis zurück"""
    url = f"https://lobid.org/gnd/search?q={token}&format=json"
    try:
        resp = requests.get(url, timeout=5)
        if resp.status_code == 200:
            data = resp.json()
            if 'member' in data and data['member']:
                first = data['member'][0]
                return first.get('preferredName'), first.get('gndIdentifier')
    except Exception as e:
        print(f"Fehler bei GND-Abfrage für '{token}': {e}")
    time.sleep(delay)
    return None, None
 def load_exlibris_refs(path):
    """CSV einlesen, Scan-Zuordnung, Platzhalter-Inventarnummer, GND-Abgleich"""
    df = pd.read_csv(path, dtype=str, header=0)
    # erste Spalte leer? → "Kürzel"
    if df.columns[0].strip() == '':
        df.rename(columns={df.columns[0]: 'Kürzel'}, inplace=True)
    df.fillna('', inplace=True)
    # Scan-Level-Spalten
    level_cols = [c for c in df.columns if c.strip() in ['0','1','2','3','4']]
    obj_list = []
    current_obj = None
    placeholder_counter = 1
    for _, row in df.iterrows():
        has_0 = row['0'].strip() if '0' in df.columns else ''
        row_refs = []
        for c in level_cols:
            val = row[c].strip()
            if val:
                row_refs.append({'level': c, 'scan_ref': val})
        if has_0:
            if current_obj:
                obj_list.append(current_obj)
            core_data = {col: row[col] for col in df.columns if col not in level_cols}
            # Inventarnummer prüfen
            inv = core_data.get('Inventarnummer','').strip()
            if not inv:
                core_data['Inventarnummer'] = f'PL-{placeholder_counter:04d}'
                placeholder_counter += 1
            # GND-Abgleich
            obj_descr = core_data.get('Objektbeschreibung','')
            gnd_name, gnd_id = None, None
            if obj_descr:
                tokens = [t.strip() for t in obj_descr.split(',') if t.strip()]
                for t in tokens:
                    name, gid = match_gnd(t)
                    if gid:
                        gnd_name = name
                        gnd_id = gid
                        break
            core_data['GND_Name'] = gnd_name
            core_data['GND_ID'] = gnd_id
            current_obj = core_data
            current_obj['ScanReferenzen'] = row_refs
        else:
            if current_obj:
                current_obj['ScanReferenzen'].extend(row_refs)
    if current_obj:
        obj_list.append(current_obj)
    out_df = pd.DataFrame(obj_list)
    core_fields = ['Kürzel','Inventarnummer','Standort','Jahr','Urheber','Eigner',
                   'Objektbeschreibung','Material','Maße (in cm)',
                   'Objekttyp','Inschrift','Anmerkungen','ScanReferenzen',
                   'GND_Name','GND_ID']
    available = [c for c in core_fields if c in out_df.columns]
    return out_df[available]
 # ====================
 # Hauptteil
 # ====================
 if __name__ == "__main__":
    # CSV im gleichen Ordner suchen
    csv_files = [f for f in os.listdir('.') if f.lower().endswith('.csv')]
    if not csv_files:
        print("Keine CSV-Datei im aktuellen Ordner gefunden.")
        exit(1)
    # nimm die erste gefundene CSV
    input_csv = csv_files[0]
    print(f"Verwende CSV-Datei: {input_csv}")
    df = load_exlibris_refs(input_csv)
    # Ergebnis als Testergebnis.csv speichern
    output_file = "Testergebnis.csv"
    df.to_csv(output_file, index=False)
    print(f"Aufbereitete Daten gespeichert als {output_file}")
--- a/VLG.py
+++ b/VLG.py
@ -1,190 +0,0 @@
 #!/usr/bin/env python3
 """
 VLG_AAT.py Gruppierung, Auflösung "Objektbeschreibung"
 NOCH OHNE AAT-ABGLEICH
 - Prüft ezodf in aktueller Umgebung
 - Liest ODS aus "Input CSV/"
 - Extrahiert Begriffe aus "Objektbeschreibung"
 - Lemmatisierung (Spacy) + Stopwortfilter
 - Subtokenisierung komplexer Phrasen
 - Zählt Häufigkeiten
 - Ausgabe ODS / CSV-Fallback in "Auswertung Ergebnisse"
 """
 import os
 import sys
 import logging
 from collections import Counter
 import pandas as pd
 import spacy
 # ---------------------------
 # Logging
 # ---------------------------
 logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
 # ---------------------------
 # ezodf prüfen
 # ---------------------------
 try:
    import ezodf
    EZODF_AVAILABLE = True
    logging.info(f"ezodf erkannt")
 except ImportError:
    EZODF_AVAILABLE = False
    logging.error("ezodf konnte nicht importiert werden!")
    logging.error("Möglicherweise nutzen Sie nicht die Python-Umgebung, in der ezodf installiert ist.")
    logging.error(f"Aktuelle Python-Executable: {sys.executable}")
    logging.error("Bitte prüfen Sie Ihre venv oder installieren Sie ezodf in dieser Umgebung:")
    logging.error("    python -m pip install ezodf")
    sys.exit(1)
 # ---------------------------
 # Spacy laden
 # ---------------------------
 try:
    nlp = spacy.load("de_core_news_sm")
    logging.info("Spacy-Modell geladen.")
 except Exception as e:
    logging.error(f"Spacy-Modell konnte nicht geladen werden: {e}")
    sys.exit(1)
 # ---------------------------
 # Konfiguration
 # ---------------------------
 INPUT_FOLDER = "Input CSV"
 OUTPUT_FOLDER = "Auswertung Ergebnisse"
 INPUT_FILENAME = None
 TARGET_COLUMN = "Objektbeschreibung"
 STOPWORDS = {"mit", "auf", "von", "und", "der", "die", "das"}  # erweiterbar
 MAPPING = {  # Projektinterne Sonderfälle
    "exlibris": "exlibris",
    "wappen": "wappen"
 }
 # ---------------------------
 # Funktionen
 # ---------------------------
 def find_input_file(folder: str, filename_hint: str = None):
    if not os.path.isdir(folder):
        raise FileNotFoundError(f"Input-Ordner '{folder}' existiert nicht.")
    files = [f for f in os.listdir(folder) if f.lower().endswith(".ods")]
    if filename_hint:
        for f in files:
            if f == filename_hint or filename_hint in f:
                return os.path.join(folder, f)
    if not files:
        raise FileNotFoundError(f"Keine .ods-Dateien in '{folder}' gefunden.")
    return os.path.join(folder, files[0])
 def read_ods_first_sheet(path: str) -> pd.DataFrame:
    """Lädt ODS, erkennt automatisch Header-Zeile."""
    try:
        df = pd.read_excel(path, engine="odf", header=None)
        logging.info("ODS mit pandas + odfpy geladen.")
    except Exception as e1:
        logging.warning(f"pandas + odfpy konnte ODS nicht lesen ({e1}).")
        if not EZODF_AVAILABLE:
            raise RuntimeError("ezodf nicht installiert und pandas + odfpy fehlgeschlagen.")
        doc = ezodf.opendoc(path)
        sheet = doc.sheets[0]
        data = []
        for row in sheet.rows():
            values = [c.value if hasattr(c, "value") else "" for c in row]
            data.append(values)
        df = pd.DataFrame(data)
        logging.info("ODS mit ezodf geladen.")
    # Header-Zeile automatisch finden
    header_row_index = None
    for i, row in df.iterrows():
        row_str = row.fillna("").astype(str).str.lower()
        if any("objektbeschreibung" in str(cell) for cell in row_str):
            header_row_index = i
            break
    if header_row_index is None:
        raise KeyError("Keine Header-Zeile mit 'Objektbeschreibung' gefunden.")
    df.columns = df.iloc[header_row_index]
    df = df.iloc[header_row_index + 1:].reset_index(drop=True)
    return df
 def tokenize_and_lemmatize(series: pd.Series) -> list:
    """Tokenisiert, entfernt Stopwords, wendet Mapping + Spacy-Lemmatisierung an."""
    series = series.fillna("").astype(str).str.strip().str.lower()
    all_terms = []
    for text in series:
        if not text:
            continue
        # Komma-Split
        for part in [p.strip() for p in text.split(",") if p.strip()]:
            # Subtokenisierung via Spacy
            doc = nlp(part)
            for token in doc:
                lemma = token.lemma_.lower()
                if lemma in STOPWORDS:
                    continue
                lemma = MAPPING.get(lemma, lemma)
                if lemma:
                    all_terms.append(lemma)
    return all_terms
 def write_output(rows: list, outpath: str):
    if EZODF_AVAILABLE:
        if not rows:
            logging.warning("Keine Daten zum Schreiben.")
            return
        keys = list(rows[0].keys())
        doc = ezodf.newdoc(doctype="ods", filename=outpath)
        sheet = ezodf.Sheet("Auswertung", size=(len(rows)+1, len(keys)))
        doc.sheets += sheet
        for ci, k in enumerate(keys):
            sheet[0, ci].set_value(k)
        for ri, row in enumerate(rows, start=1):
            for ci, k in enumerate(keys):
                sheet[ri, ci].set_value(row.get(k, ""))
        doc.save()
        logging.info(f"ODS geschrieben: {outpath}")
    else:
        csv_path = os.path.splitext(outpath)[0] + ".csv"
        df = pd.DataFrame(rows)
        df.to_csv(csv_path, index=False, sep=";", encoding="utf-8")
        logging.info(f"CSV-Fallback geschrieben: {csv_path}")
 # ---------------------------
 # Hauptfunktion
 # ---------------------------
 def main(input_folder=INPUT_FOLDER, input_filename=INPUT_FILENAME):
    input_path = find_input_file(input_folder, filename_hint=input_filename)
    input_basename = os.path.splitext(os.path.basename(input_path))[0]
    logging.info(f"Verarbeite Datei: {input_path}")
    df = read_ods_first_sheet(input_path)
    logging.info(f"Geladene Spalten: {list(df.columns)}")
    if TARGET_COLUMN.lower() not in [str(c).lower() for c in df.columns]:
        raise KeyError(f"Spalte '{TARGET_COLUMN}' nicht gefunden.")
    terms = tokenize_and_lemmatize(df[TARGET_COLUMN])
    logging.info(f"Gefundene Begriffe: {len(terms)}")
    counts = Counter(terms)
    sorted_terms = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)
    rows = [{"Begriff": term, "Anzahl": freq} for term, freq in sorted_terms]
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    out_name = f"{input_basename} Auswertung.ods"
    out_path = os.path.join(OUTPUT_FOLDER, out_name)
    write_output(rows, out_path)
    logging.info("Fertig.")
 if __name__ == "__main__":
    argv = sys.argv[1:]
    folder = INPUT_FOLDER
    fname = INPUT_FILENAME
    if len(argv) >= 1:
        folder = argv[0]
    if len(argv) >= 2:
        fname = argv[1]
    main(input_folder=folder, input_filename=fname)
--- a/VLG_API_multi.py
+++ b/VLG_API_multi.py
@ -1,262 +0,0 @@
 import os
 import sys
 import time
 import json
 import requests
 import pandas as pd
 from pathlib import Path
 from difflib import SequenceMatcher
 import argparse
 # =========================
 # Argumente / Dry-Run
 # =========================
 parser = argparse.ArgumentParser()
 parser.add_argument('--dry-run', action='store_true', help='API-Abfragen simulieren')
 args = parser.parse_args()
 DRY_RUN = args.dry_run
 # =========================
 # Konfiguration
 # =========================
 INPUT_DIR = Path("Input CSV")
 OUTPUT_DIR = Path("Auswertung Ergebnisse")
 OUTPUT_DIR.mkdir(exist_ok=True)
 TIMEOUT = 5
 MAX_RETRIES = 3
 BACKOFF_FACTOR = 2
 MAX_CONSECUTIVE_FAILURES = 10
 CACHE_FILE = "api_cache.json"
 if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE, "r", encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 API_ACTIVE = {"gnd": True, "wikidata": True}
 FAIL_COUNTER = {"gnd": 0, "wikidata": 0}
 HEADERS = {"User-Agent": "VLG_API_multi/1.0 (projekt@example.com)"}
 # =========================
 # Logging
 # =========================
 def log(level, msg):
    print(f"[{level}] {msg}")
 # =========================
 # Cache speichern
 # =========================
 def save_cache():
    with open(CACHE_FILE, "w", encoding="utf-8") as f:
        json.dump(CACHE, f, indent=2, ensure_ascii=False)
 # =========================
 # Request mit Retry & Backoff
 # =========================
 def request_with_retries(api_name, url, params=None):
    if DRY_RUN:
        return {"dummy": True}
    if not API_ACTIVE[api_name]:
        return None
    cache_key = url + (str(params) if params else "")
    if cache_key in CACHE:
        return CACHE[cache_key]
    retries = 0
    while retries < MAX_RETRIES:
        try:
            r = requests.get(url, params=params, timeout=TIMEOUT, headers=HEADERS)
            if r.status_code == 200:
                try:
                    data = r.json()
                except:
                    data = r.text
                CACHE[cache_key] = data
                save_cache()
                FAIL_COUNTER[api_name] = 0
                return data
            elif r.status_code in [403, 429]:
                log("ERROR", f"{api_name.upper()} HTTP {r.status_code} – Stopschalter aktiviert")
                API_ACTIVE[api_name] = False
                return None
            else:
                log("ERROR", f"{api_name.upper()} HTTP {r.status_code}")
        except requests.exceptions.Timeout:
            log("ERROR", f"Timeout bei {api_name.upper()}")
        except Exception as e:
            log("ERROR", f"Fehler bei {api_name.upper()}: {e}")
        retries += 1
        sleep_time = min(BACKOFF_FACTOR ** retries, 30)
        time.sleep(sleep_time)
    FAIL_COUNTER[api_name] += 1
    if FAIL_COUNTER[api_name] >= MAX_CONSECUTIVE_FAILURES:
        log("CRITICAL", f"{MAX_CONSECUTIVE_FAILURES} Fehler bei {api_name.upper()} – Stopschalter aktiviert")
        API_ACTIVE[api_name] = False
    return None
 # =========================
 # API-Abfragen mit Confidence
 # =========================
 def query_gnd(term, min_conf=0.6):
    if DRY_RUN or not API_ACTIVE["gnd"]:
        return "TEST_GND", 1.0
    url = f"https://lobid.org/gnd/search?q={term}&format=json"
    data = request_with_retries("gnd", url)
    if not data:
        return "API nicht erreichbar", 0.0
    results = []
    scores = []
    for doc in data.get("member", []):
        name = doc.get("preferredName", "")
        conf = SequenceMatcher(None, term.lower(), name.lower()).ratio()
        if conf >= min_conf:
            results.append(name)
            scores.append(conf)
    if results:
        return ", ".join(results), max(scores)
    return "ohne Ergebnis", 0.0
 def query_wikidata(term, min_conf=0.5):
    if DRY_RUN or not API_ACTIVE["wikidata"]:
        return "TEST_WD", 1.0
    url = "https://www.wikidata.org/w/api.php"
    params = {"action": "wbsearchentities", "search": term, "language": "de", "format": "json"}
    data = request_with_retries("wikidata", url, params)
    if not data:
        return "API nicht erreichbar", 0.0
    results = []
    scores = []
    for entry in data.get("search", []):
        match_info = entry.get("match", {})
        score = match_info.get("score", 0.0)
        if score >= min_conf:
            results.append(entry["label"])
            scores.append(score)
    if results:
        return ", ".join(results), max(scores)
    return "ohne Ergebnis", 0.0
 # =========================
 # Input laden
 # =========================
 def load_input_file(file_path):
    try:
        if file_path.suffix.lower() == ".ods":
            df = pd.read_excel(file_path, engine="odf", header=None)
        elif file_path.suffix.lower() == ".xlsx":
            df = pd.read_excel(file_path, engine="openpyxl", header=None)
        elif file_path.suffix.lower() == ".csv":
            df = pd.read_csv(file_path, header=None)
        else:
            log("WARNING", f"Unbekanntes Dateiformat: {file_path.name}")
            return None
        return df
    except Exception as e:
        log("ERROR", f"Fehler beim Laden von {file_path.name}: {e}")
        return None
 # =========================
 # Header-Zeile suchen
 # =========================
 def find_header_row(df, keywords=["objektbeschreibung", "objekt/ebene"]):
    for i, row in df.iterrows():
        row_lower = [str(cell).lower() if pd.notna(cell) else "" for cell in row]
        if any(kw in cell for kw in keywords for cell in row_lower):
            return i, row_lower
    return None, None
 # =========================
 # Verarbeitung
 # =========================
 def process_files():
    all_terms = []
    output_rows = []
    for file_path in INPUT_DIR.glob("*"):
        if not file_path.suffix.lower() in [".csv", ".xlsx", ".ods"]:
            continue
        log("INFO", f"Verarbeite {file_path.name}")
        df = load_input_file(file_path)
        if df is None:
            continue
        header_idx, header_row = find_header_row(df)
        if header_idx is None:
            log("WARNING", f"Keine Header-Zeile gefunden in {file_path.name}")
            continue
        df.columns = header_row
        df = df.iloc[header_idx+1:].reset_index(drop=True)
        col_objdesc = next((col for col in df.columns if "objektbeschreibung" in str(col).lower()), None)
        col_objlevel = next((col for col in df.columns if "objekt/ebene" in str(col).lower()), None)
        if not col_objdesc:
            log("WARNING", f"Keine Spalte 'Objektbeschreibung' in {file_path.name}")
            continue
        term_list = []
        obj_level_list = []
        for _, row in df.iterrows():
            terms = str(row[col_objdesc]) if pd.notna(row[col_objdesc]) else ""
            if not terms:
                continue
            for term in [t.strip() for t in terms.split(",") if t.strip()]:
                term_list.append(term)
                obj_level_list.append(row[col_objlevel] if col_objlevel and pd.notna(row[col_objlevel]) else "")
        # API-Abfragen
        gnd_results = []
        gnd_scores = []
        wikidata_results = []
        wikidata_scores = []
        for term in term_list:
            gnd_res, gnd_conf = query_gnd(term)
            wikidata_res, wd_conf = query_wikidata(term)
            gnd_results.append(gnd_res)
            gnd_scores.append(gnd_conf)
            wikidata_results.append(wikidata_res)
            wikidata_scores.append(wd_conf)
        for idx, term in enumerate(term_list):
            output_rows.append({
                "Begriff": term,
                "Quelle": file_path.name,
                "Objekt/Ebene": obj_level_list[idx],
                "GND": gnd_results[idx],
                "GND_Confidence": gnd_scores[idx],
                "Wikidata": wikidata_results[idx],
                "Wikidata_Confidence": wikidata_scores[idx]
            })
        all_terms.extend(term_list)
    # Hauptoutput
    out_df = pd.DataFrame(output_rows)
    out_file = OUTPUT_DIR / "Auswertung_gesamt.ods"
    out_df.to_excel(out_file, index=False, engine="odf")
    log("INFO", f"Hauptauswertung gespeichert: {out_file}")
    # Rohdatei
    raw_terms = pd.Series(all_terms).value_counts().reset_index()
    raw_terms.columns = ["Begriff", "Häufigkeit"]
    raw_file = OUTPUT_DIR / "Rohbegriffe.ods"
    raw_terms.to_excel(raw_file, index=False, engine="odf")
    log("INFO", f"Rohbegriffe gespeichert: {raw_file}")
 # =========================
 # Main
 # =========================
 if __name__ == "__main__":
    if not INPUT_DIR.exists():
        log("CRITICAL", f"Eingabeordner {INPUT_DIR} fehlt!")
        sys.exit(1)
    process_files()
--- a/Vorschlag_Ersetzen/Vorschlag_Ersetzen_Makro
+++ b/Vorschlag_Ersetzen/Vorschlag_Ersetzen_Makro
@ -0,0 +1,2 @@
 Makro für die Erfassungstabelle, mit dem Vorschläge für Begriffe per Klick angenommen und ersetzt werden sollen. 
 Funktioniert nicht in LibreOffice, müsste in excel aber laufen.
--- a/Vorschlag_Ersetzen/vorschlag_ersetzen_macro.py
+++ b/Vorschlag_Ersetzen/vorschlag_ersetzen_macro.py
--- a/Wiki_Anleitungen/.~lock.Anleitung
+++ b/Wiki_Anleitungen/.~lock.Anleitung
@ -0,0 +1 @@
 ,jarnold,workPC,16.10.2025 13:04,file:///home/jarnold/.config/libreoffice/4;
--- a/Wiki_Anleitungen/Anleitung
+++ b/Wiki_Anleitungen/Anleitung
--- a/Wiki_Anleitungen/Anleitung_Normvokabular_Abgleich_Makro.odt
+++ b/Wiki_Anleitungen/Anleitung_Normvokabular_Abgleich_Makro.odt
--- a/Wiki_Anleitungen/Benutzungsanleitung
+++ b/Wiki_Anleitungen/Benutzungsanleitung
@ -0,0 +1,125 @@
 = Benutzungsanleitung - NV_MASTER Abgleich Makro =
 '''(mapper_macro_2.x.py)'''
 == 1. Was das Makro macht ==
 Dieses Makro hilft dir dabei, Begriffe in der Auswertungstabelle zu vereinheitlichen.
 Es vergleicht automatisch die Inhalte aus der Spalte „Objektbeschreibung“ mit einer
 Normvokabular-Referenzdatei namens „NV_MASTER.ods“.
 So findest du heraus, welche Begriffe schon genormt sind, wo es passende Vorschläge gibt
 und wo etwas gar nicht erkannt wurde.
 Das Makro markiert in der Auswertungstabelle jede Zeile unter „Objektbeschreibung“
 farbig:
 * <span style="color:green;">Grün</span>: Alles passt, alle Begriffe gefunden
 * <span style="color:yellow;">Gelb</span>: Einige Begriffe wurden erkannt, andere nicht
 * <span style="color:red;">Rot</span>: Kein einziger Begriff erkannt
 Beispiel:
 {| class="wikitable"
 |+ Tabelle 1
 |-
 ! Objektbeschreibung !! Norm_Treffer !! Norm_Vorschlag !! Kein_Treffer
 |-
 | (leer) || || || 
 |}
 Die Spalten „Norm_Treffer“, „Norm_Vorschlag“ und „Kein_Treffer“ legt das Makro
 automatisch an, wenn sie fehlen.
 ! Tipps zur Nutzung !
 * Wenn du die NV_MASTER-Datei änderst, starte das Makro neu – es liest sie bei
 jedem Lauf neu ein.
 * Erstelle ein Backup der Auswertungstabelle, bevor du das Makro ausführst.
 * Schaue ab und zu in die Logdatei, um zu prüfen, ob alles korrekt läuft.
 * Wenn ein Begriff rot markiert wird, aber deiner Meinung nach sinnvoll und zutreffend
 für das beschriebene Objekt ist, schreibe den Begriff auf und sprich mit deinen
 Vorgesetzten ab, ob er in das Normvokabular aufgenommen werden sollte.
 == 2. Wo die Dateien des Makros liegen müssen ==
 '''Unter Linux:'''
 <pre>
 /home/&lt;dein-benutzername&gt;/.config/libreoffice/4/user/Scripts/python/Vokabular_Abgleich_Makro/
 </pre>
 '''Unter Windows:'''
 <pre>
 C:\Users\<dein-benutzername>\AppData\Roaming\LibreOffice\4\user\Scripts\python\Vokabular_Abgleich_Makro\
 </pre>
 In diesem Ordner müssen liegen:  
 * mapper_macro_2.x.py (das Makro)  
 * NV_MASTER.ods (die Referenzdatei)  
 * optional: mapper_macro_2.x.log (wird automatisch erstellt)
 == 3. Wie du das Makro startest ==
 # Öffne deine Calc-Datei mit den Begriffen
 # Gehe im Menü auf Extras → Makros → Makros ausführen...
 # Wähle: Meine Makros → mapper_macro_2.x.py → run_mapper_macro
 # Klicke auf Ausführen
 Das Makro startet sofort. Je nach Tabellenumfang dauert der Abgleich ein paar Sekunden bis Minuten.  
 Wenn nichts passiert, liegt ein Fehler vor. In diesem Fall schaue in die .log-Datei, die das Makro bei jedem Durchlauf erstellt. Sie liegt im selben Ordner wie das Makro.
 == 4. Wie du das Ergebnis liest ==
 Nach dem Lauf schreibt das Makro die Treffer und Vorschläge direkt in deine Tabelle und markiert sie:
 {| class="wikitable"
 |+ Tabelle 2
 |-
 ! Objektbeschreibung !! Norm_Treffer !! Norm_Vorschlag !! Kein_Treffer
 |-
 | Harfe, Noten, Bäume, Geldbeutel, Landschaft, Gewässer || Harfe (2.1) | Noten (3.4) | Landschaft (7.2) Gewässer (9.1) || Baum || Geldbeutel
 |}
 Farben:  
 * 🟩 <span style="color:green;">Grün</span>: Alle Begriffe wurden direkt erkannt → Perfekt!  
 * 🟨 <span style="color:yellow;">Gelb</span>: Einige Begriffe wurden erkannt, aber andere nur teilweise oder gar nicht → Vorschläge unter der Spalte „Norm_Vorschlag“ prüfen  
 * 🟥 <span style="color:red;">Rot</span>: Kein Begriff wurde gefunden → Objektbeschreibung anpassen, ggf. neue Begriffe in das Normvokabular aufnehmen
 == 5. Wo das Protokoll liegt (Logdatei) ==
 Das Makro schreibt alles, was passiert, in eine Logdatei:
 '''Linux:''' /home/<dein-benutzername>/.config/libreoffice/4/user/Scripts/python/Vokabular_Abgleich_Makro/mapper_macro_2.x.log  
 '''Windows:''' C:\Users\<dein-benutzername>\AppData\Roaming\LibreOffice\4\user\Scripts\python\Vokabular_Abgleich_Makro\mapper_macro_2.x.log  
 Dort siehst du:  
 * wann das Makro gestartet wurde  
 * wie viele Zeilen verarbeitet wurden  
 * und ob Fehler aufgetreten sind
 {| class="wikitable"
 |+ Tabelle 3
 |-
 ! Problem !! Ursache !! Lösung
 |-
 | Das Makro taucht nicht auf || Falscher Speicherort || Prüfe, ob das Skript wirklich im Ordner Scripts/python liegt
 |-
 | Fehlermeldung „Module not found“ || Python-Bibliotheken fehlen || Installiere pandas, odfpy, spacy, rapidfuzz
 |-
 | NV_MASTER wird nicht gelesen || Datei fehlt oder ist kaputt || Prüfe Name und Speicherort
 |-
 | LibreOffice stürzt ab || Sehr große Datei oder fehlerhafte NV_MASTER || Teste mit kleinerer Datei oder neuem NV_MASTER
 |}
 == 6. Was das Makro benötigt, um einwandfrei zu laufen ==
 Alle folgenden Pakete sind für das Makro notwendig, egal ob LibreOffice oder Excel:
 {| class="wikitable"
 |+ Tabelle 4
 |-
 ! Paket !! Zweck
 |-
 | pandas || Einlesen der Referenzdatei (NV_MASTER.ods)
 |-
 | odfpy || Ermöglicht Lesen von .ods-Dateien (für pandas.read_excel(..., engine="odf"))
 |-
 | spacy || Lemmatisierung (optional, aber empfohlen)
 |-
 | rapidfuzz || Schnelles Fuzzy-Matching (Alternativ zu difflib)
 |-
 | openpyxl || Wird benötigt, falls .xlsx genutzt wird
 |-
 | python-dateutil || Wird automatisch von pandas gebraucht
 |}
--- a/Wiki_Anleitungen/DigitalisierungWorkflow151025.drawio.pdf
+++ b/Wiki_Anleitungen/DigitalisierungWorkflow151025.drawio.pdf
--- a/Wiki_Anleitungen/Digitalisierung_Workflow_141025.drawio
+++ b/Wiki_Anleitungen/Digitalisierung_Workflow_141025.drawio
@ -0,0 +1,622 @@
 <mxfile host="app.diagrams.net" agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0" version="28.2.5">
  <diagram name="Page-1" id="aLmyRVYCle99qeRE2JvP">
    <mxGraphModel dx="1301" dy="1900" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
      <root>
        <mxCell id="0" />
        <mxCell id="1" parent="0" />
        <mxCell id="WA2_J1DCvVjPXciXSW-M-3" value="" style="group" parent="1" vertex="1" connectable="0">
          <mxGeometry x="85" y="932" width="310" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-2" value="Scan- und Erfassungsprozess" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" parent="WA2_J1DCvVjPXciXSW-M-3" vertex="1">
          <mxGeometry x="60" y="-900" width="210" height="30" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-13" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-4" target="WA2_J1DCvVjPXciXSW-M-6" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-14" value="Makro gibt Vorschläge aus NV zurück" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="WA2_J1DCvVjPXciXSW-M-13" vertex="1" connectable="0">
          <mxGeometry x="0.2678" y="-1" relative="1" as="geometry">
            <mxPoint as="offset" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-4" value="&lt;div&gt;Makro (mapper_macro_2.x.py)&lt;/div&gt;" style="ellipse;whiteSpace=wrap;html=1;fillColor=#FFFF66;" parent="1" vertex="1">
          <mxGeometry x="575" y="52" width="200" height="100" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.367;exitY=0.988;exitDx=0;exitDy=0;exitPerimeter=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-1" target="WA2_J1DCvVjPXciXSW-M-4" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="405" y="97" as="sourcePoint" />
            <Array as="points">
              <mxPoint x="235" y="91" />
              <mxPoint x="235" y="117" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-12" value="Wird vom Makro gelesen und mit NV abgeglichen" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="WA2_J1DCvVjPXciXSW-M-5" vertex="1" connectable="0">
          <mxGeometry x="0.0228" y="4" relative="1" as="geometry">
            <mxPoint as="offset" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-6" value="Anpassung der Erfassungstabelle anhand der Vorschläge" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#B3FF66;" parent="1" vertex="1">
          <mxGeometry x="80" y="212" width="320" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-10" value="" style="endArrow=none;dashed=1;html=1;rounded=0;" parent="1" edge="1">
          <mxGeometry width="50" height="50" relative="1" as="geometry">
            <mxPoint x="515" y="972" as="sourcePoint" />
            <mxPoint x="515" y="12" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-11" value="&lt;h1 style=&quot;margin-top: 0px;&quot;&gt;Workflow Digitalisierung -&lt;/h1&gt;&lt;h1 style=&quot;margin-top: 0px;&quot;&gt;&lt;u&gt;&lt;font style=&quot;font-size: 20px;&quot;&gt;Objekterfassung und Pflege des Normvokabulars&lt;/font&gt;&lt;/u&gt;&lt;/h1&gt;&lt;div&gt;&lt;font style=&quot;font-size: 14px;&quot;&gt;- Erfassung und Verschlagwortung von Bildobjekten&lt;/font&gt;&lt;/div&gt;&lt;div&gt;&lt;font style=&quot;font-size: 14px;&quot;&gt;- Abgleich mit internem Normvokabular&lt;/font&gt;&lt;/div&gt;&lt;div&gt;&lt;font style=&quot;font-size: 14px;&quot;&gt;- API-Abgleich mit getty und GND&lt;/font&gt;&lt;/div&gt;&lt;div&gt;&lt;font style=&quot;font-size: 14px;&quot;&gt;- Pflege und Erweiterung des Normvokabulars&lt;/font&gt;&lt;/div&gt;" style="text;html=1;whiteSpace=wrap;overflow=hidden;rounded=0;" parent="1" vertex="1">
          <mxGeometry x="30" y="-1070" width="455" height="220" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-1" value="&lt;div&gt;Scan und Erfassen der Objekte, Erfassung in Tabellen, Spalte &quot;Objektbeschreibung&quot;&lt;/div&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#B3FF66;" parent="1" vertex="1">
          <mxGeometry x="85" y="32" width="310" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-16" value="Makro 2 (Übernahme von Vorschlägen aus NV per Klick)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#FF6666;" parent="1" vertex="1">
          <mxGeometry x="575" y="292" width="190" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-20" value="" style="html=1;shadow=0;dashed=0;align=center;verticalAlign=middle;shape=mxgraph.arrows2.arrow;dy=0.6;dx=40;notch=0;fillColor=#FF6666;" parent="1" vertex="1">
          <mxGeometry x="460" y="312" width="90" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-23" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.071;entryY=0.25;entryDx=0;entryDy=0;entryPerimeter=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;dashed=1;" parent="1" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="592.495" y="462" as="targetPoint" />
            <mxPoint x="232.5700000000001" y="432" as="sourcePoint" />
            <Array as="points">
              <mxPoint x="233" y="462" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-34" value="Gleiche Funktion wie Makro 1 + API-Abgleich" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="WA2_J1DCvVjPXciXSW-M-23" vertex="1" connectable="0">
          <mxGeometry x="-0.4298" relative="1" as="geometry">
            <mxPoint x="53" as="offset" />
          </mxGeometry>
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-15" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.25;exitY=1;exitDx=0;exitDy=0;" edge="1" parent="1" source="WA2_J1DCvVjPXciXSW-M-21">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="165.20000000000005" y="510" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-21" value="Bereinigte Erfassungstabelle" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#99CCFF;" parent="1" vertex="1">
          <mxGeometry x="85" y="362" width="320" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-37" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.452;entryY=-0.007;entryDx=0;entryDy=0;entryPerimeter=0;dashed=1;" parent="1" source="WA2_J1DCvVjPXciXSW-M-24" target="WA2_J1DCvVjPXciXSW-M-33" edge="1">
          <mxGeometry relative="1" as="geometry">
            <Array as="points">
              <mxPoint x="232" y="480" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-60" value="gibt aus" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="WA2_J1DCvVjPXciXSW-M-37" vertex="1" connectable="0">
          <mxGeometry x="-0.0997" y="-1" relative="1" as="geometry">
            <mxPoint as="offset" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-24" value="NormVokabular_Mapper.py" style="ellipse;whiteSpace=wrap;html=1;fillColor=#FFFF66;" parent="1" vertex="1">
          <mxGeometry x="592.5" y="432" width="175" height="80" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-27" value="&lt;u&gt;&lt;b&gt;WHK/Manuell&lt;/b&gt;&lt;/u&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="145" width="100" height="30" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-28" value="&lt;b&gt;&lt;u&gt;Programm/automatisiert&lt;/u&gt;&lt;/b&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="590" width="160" height="30" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-29" value="&lt;div&gt;Mögliche Optimierung, funktioniert aber nicht in LO&lt;/div&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;fontSize=8;" parent="1" vertex="1">
          <mxGeometry x="570" y="362" width="200" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-32" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.484;entryY=0;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-6" target="WA2_J1DCvVjPXciXSW-M-21" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-17" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="1" source="WA2_J1DCvVjPXciXSW-M-33">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="247.5" y="720" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-33" value="Bereinigte Erfassungstabelle" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#99CCFF;" parent="1" vertex="1">
          <mxGeometry x="85" y="512" width="325" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-35" value="&lt;ul&gt;&lt;li&gt;Liest Spalte Objektbeschreibung aus, filtert Stopwords und Zahlen raus&lt;/li&gt;&lt;li&gt;Normalisierung, Lemmatisierung, Stemming der Wörter für höhere Trefferwahrscheinlichkeit&lt;/li&gt;&lt;li&gt;Liest das Normvokabular, Berücksichtigt ID-Hierarchie, erstellt Index für gestemmte Begriffe&lt;/li&gt;&lt;li&gt;Abgleich mit Normvokabular, generiert Vorschläge wenn kein Treffer vorliegt&lt;/li&gt;&lt;li&gt;API-Abgleich (aktuell GND und wikidata, Top1-Treffer)&lt;/li&gt;&lt;li&gt;Erstellt eine Auswertungsdatei, markiert Begriffe entsprechend ihres Status)&lt;/li&gt;&lt;/ul&gt;" style="text;strokeColor=none;fillColor=none;html=1;whiteSpace=wrap;verticalAlign=middle;overflow=hidden;fontSize=10;align=left;" parent="1" vertex="1">
          <mxGeometry x="520" y="532" width="300" height="160" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-93" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-39" target="WA2_J1DCvVjPXciXSW-M-45" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-39" value="Aufnahme ins Normvokabular oder Verwerfen des Begriffs" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#B3FF66;" parent="1" vertex="1">
          <mxGeometry x="85" y="722" width="330" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-43" value="" style="endArrow=none;dashed=1;html=1;rounded=0;" parent="1" edge="1">
          <mxGeometry width="50" height="50" relative="1" as="geometry">
            <mxPoint x="515" y="1192" as="sourcePoint" />
            <mxPoint x="515" y="962" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-94" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;dashed=1;" parent="1" source="WA2_J1DCvVjPXciXSW-M-45" target="WA2_J1DCvVjPXciXSW-M-46" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="615" y="832" as="targetPoint" />
            <Array as="points">
              <mxPoint x="475" y="822" />
              <mxPoint x="475" y="822" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.25;exitY=1;exitDx=0;exitDy=0;" edge="1" parent="1" source="WA2_J1DCvVjPXciXSW-M-45">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="167.66666666666674" y="980" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-45" value="Manuelle Anpassung der Normvokabular-Masterfile" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#99CCFF;" parent="1" vertex="1">
          <mxGeometry x="85" y="802" width="330" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-92" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;dashed=1;" parent="1" source="WA2_J1DCvVjPXciXSW-M-46" target="WA2_J1DCvVjPXciXSW-M-52" edge="1">
          <mxGeometry relative="1" as="geometry">
            <Array as="points">
              <mxPoint x="695" y="912" />
              <mxPoint x="198" y="912" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-220" value="gibt aus" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="WA2_J1DCvVjPXciXSW-M-92" vertex="1" connectable="0">
          <mxGeometry x="0.3024" y="-2" relative="1" as="geometry">
            <mxPoint as="offset" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-46" value="Masterfile_Editor.py" style="ellipse;whiteSpace=wrap;html=1;fillColor=#FFFF66;" parent="1" vertex="1">
          <mxGeometry x="635" y="782" width="120" height="80" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-49" value="liest und bereinigt Normvokabular" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="427.5" y="817" width="200" height="30" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-58" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-52" target="WA2_J1DCvVjPXciXSW-M-57" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-221" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="WA2_J1DCvVjPXciXSW-M-52" target="WA2_J1DCvVjPXciXSW-M-57" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-222" value="=" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="WA2_J1DCvVjPXciXSW-M-221" vertex="1" connectable="0">
          <mxGeometry x="-0.3079" y="1" relative="1" as="geometry">
            <mxPoint as="offset" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-52" value="Aktualisierte Masterfile, mit allen Änderungen und in der richtigen Struktur" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#99CCFF;" parent="1" vertex="1">
          <mxGeometry x="85" y="980" width="225" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-59" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-57" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="70" y="242" as="targetPoint" />
            <Array as="points">
              <mxPoint x="40" y="1130" />
              <mxPoint x="40" y="242" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-57" value="Masterfile Normvokabular Updated" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#99CCFF;" parent="1" vertex="1">
          <mxGeometry x="117.5" y="1100" width="160" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-64" value="" style="html=1;shadow=0;dashed=0;align=center;verticalAlign=middle;shape=mxgraph.arrows2.arrow;dy=0.6;dx=40;notch=0;fillColor=#FF6666;" parent="1" vertex="1">
          <mxGeometry x="410" y="1107.5" width="90" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-200" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-65" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="40" y="1140" as="targetPoint" />
            <Array as="points">
              <mxPoint x="680" y="1180" />
              <mxPoint x="40" y="1180" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-65" value="&lt;div&gt;Normvokabular-Masterfile muss&amp;nbsp;&lt;/div&gt;&lt;div&gt;&lt;b&gt;zentral&lt;/b&gt; als &lt;b&gt;SPOT&lt;/b&gt; vorliegen und gepflegt werden können&lt;/div&gt;" style="ellipse;whiteSpace=wrap;html=1;fillColor=#FF6666;" parent="1" vertex="1">
          <mxGeometry x="575" y="1075" width="210" height="85" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-66" value="" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#99CCFF;" parent="1" vertex="1">
          <mxGeometry x="485" y="-1046" width="20" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-67" value="" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#FF6666;" parent="1" vertex="1">
          <mxGeometry x="485" y="-1006" width="20" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-68" value="" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#B3FF66;" parent="1" vertex="1">
          <mxGeometry x="485" y="-966" width="20" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-69" value="" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#FFFF66;" parent="1" vertex="1">
          <mxGeometry x="485" y="-926" width="20" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-70" value="&lt;b&gt;Datei&lt;/b&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="518" y="-1050" width="50" height="30" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-71" value="&lt;b&gt;Fehlender Schritt/Optimierungsmöglichkeit&lt;/b&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="510" y="-1011" width="270" height="30" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-72" value="&lt;b&gt;Vorgang, WHK&lt;/b&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="515" y="-971" width="110" height="30" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-73" value="&lt;b&gt;Programm&lt;/b&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="520" y="-931" width="80" height="30" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-74" value="" style="endArrow=none;html=1;rounded=0;" parent="1" edge="1">
          <mxGeometry width="50" height="50" relative="1" as="geometry">
            <mxPoint x="35" y="-850" as="sourcePoint" />
            <mxPoint x="805" y="-850" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-75" value="&lt;div align=&quot;left&quot;&gt;&lt;font style=&quot;font-size: 13px;&quot;&gt;&lt;b&gt;&lt;u&gt;&lt;font&gt;Probleme/Noch zu klären:&lt;/font&gt;&lt;/u&gt;&lt;/b&gt;&lt;/font&gt;&lt;ul&gt;&lt;li&gt;&lt;font style=&quot;font-size: 13px;&quot;&gt;&lt;b&gt;Makro 1 und NormVokabular-Mapper&lt;/b&gt; redundant, eine Methode festlegen (Makro benutzerfreundlicher, Treffer/Vorschläge direkt in Erfassung sichtbar, Mapper genauer, API-Abgleich, Auswertungsdatei übersichtlicher)&lt;/font&gt;&lt;/li&gt;&lt;li&gt;&lt;font style=&quot;font-size: 13px;&quot;&gt;&lt;b&gt;Makro 2&lt;/b&gt; (Vorschläge aus Normvokabular können automatisch per Klick in die Erfassungstabelle übernommen werden)&lt;/font&gt;&lt;/li&gt;&lt;li&gt;&lt;font style=&quot;font-size: 13px;&quot;&gt;&lt;b&gt;Normvokabular&lt;/b&gt;: Eine zentrale .json als SPOT etablieren und zentral in alle Prozesse einbinden&lt;/font&gt;&lt;/li&gt;&lt;li&gt;&lt;font style=&quot;font-size: 13px;&quot;&gt;&lt;b&gt;Mapper&lt;/b&gt;&amp;nbsp;oder &lt;b&gt;Makro&lt;/b&gt; benötigt Funktion, Wörter ohne Treffer und Vorschlag in &lt;br&gt;eigene Liste zu übernehmen und auszugeben -&amp;gt; manuelle Prüfung&lt;/font&gt;&lt;/li&gt;&lt;li&gt;&lt;font style=&quot;font-size: 13px;&quot;&gt;&lt;b&gt;Normvokabular&lt;/b&gt;: Regeln, ID-Struktur, Kategorien müssen auf Qualität und Nutzbarkeit geprüft werden; danach Anpassung aller Programme, die sich auf Normvokabular stützen&lt;/font&gt;&lt;/li&gt;&lt;/ul&gt;&lt;font style=&quot;font-size: 13px;&quot;&gt;&lt;br&gt;&lt;/font&gt;&lt;/div&gt;" style="rounded=0;whiteSpace=wrap;html=1;align=left;spacing=2;spacingRight=0;" parent="1" vertex="1">
          <mxGeometry x="40" y="1232" width="770" height="190" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-80" value="&lt;ul&gt;&lt;li&gt;Liest Spalte Objektbeschreibung aus, filtert Stopwords und Zahlen raus&lt;/li&gt;&lt;li&gt;Normalisierung, Lemmatisierung, Stemming der Wörter für höhere Trefferwahrscheinlichkeit&lt;/li&gt;&lt;li&gt;Liest das Normvokabular, Berücksichtigt ID-Hierarchie, erstellt Index für gestemmte Begriffe, cache und log&lt;/li&gt;&lt;li&gt;Abgleich mit Normvokabular, generiert Vorschläge wenn kein Treffer vorliegt&lt;/li&gt;&lt;li&gt;Markiert Treffer, Vorschläge und Keine Treffer&lt;/li&gt;&lt;/ul&gt;" style="text;strokeColor=none;fillColor=none;html=1;whiteSpace=wrap;verticalAlign=middle;overflow=hidden;fontSize=10;align=left;" parent="1" vertex="1">
          <mxGeometry x="525" y="132" width="300" height="160" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-81" value="" style="endArrow=none;dashed=1;html=1;dashPattern=1 3;strokeWidth=2;rounded=0;" parent="1" edge="1">
          <mxGeometry width="50" height="50" relative="1" as="geometry">
            <mxPoint x="245" y="322" as="sourcePoint" />
            <mxPoint x="455" y="322" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-83" value="" style="endArrow=none;dashed=1;html=1;dashPattern=1 3;strokeWidth=2;rounded=0;entryX=0.055;entryY=0.48;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" target="WA2_J1DCvVjPXciXSW-M-64" edge="1">
          <mxGeometry width="50" height="50" relative="1" as="geometry">
            <mxPoint x="281" y="1117" as="sourcePoint" />
            <mxPoint x="365" y="1002" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-91" value="&lt;ul&gt;&lt;li&gt;Automatische Spaltenerkennung (ID, Name/Wort/Vokabel)&lt;/li&gt;&lt;li&gt;Aufbau einer hierarchischen Struktur (Ober-, Unter-, Unterunterkategorien)&lt;/li&gt;&lt;li&gt;Erstellung eines Mastersheets mit eindeutigen IDs&lt;/li&gt;&lt;li&gt;Sortierte Ausgabe nach vordefinierter Sheet-Reihenfolge&lt;/li&gt;&lt;li&gt;Protokollierung im Terminal (Zeilenanzahl, Warnungen, ID-Zählung)&lt;/li&gt;&lt;li&gt;Speicherung einer neuen, synchronisierten Output-Datei ohne Änderung der Originaldatei&lt;/li&gt;&lt;/ul&gt;" style="text;strokeColor=none;fillColor=none;html=1;whiteSpace=wrap;verticalAlign=middle;overflow=hidden;fontSize=10;" parent="1" vertex="1">
          <mxGeometry x="510" y="902" width="310" height="160" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-96" value="" style="endArrow=none;html=1;rounded=0;" parent="1" edge="1">
          <mxGeometry width="50" height="50" relative="1" as="geometry">
            <mxPoint x="15" y="1460" as="sourcePoint" />
            <mxPoint x="815" y="1460" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-192" value="&lt;font style=&quot;font-size: 19px;&quot;&gt;&lt;b&gt;&lt;u&gt;3. Aktuelle Struktur des Normvokabulars (Stand 10/25)&lt;/u&gt;&lt;/b&gt;&lt;/font&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="35" y="1480" width="510" height="40" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-195" value="" style="group" parent="1" vertex="1" connectable="0">
          <mxGeometry x="90" y="1740" width="580" height="380" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-97" value="Assets" style="childLayout=tableLayout;recursiveResize=0;strokeColor=#98bf21;fillColor=#A7C942;shadow=1;" parent="WA2_J1DCvVjPXciXSW-M-195" vertex="1">
          <mxGeometry x="50" y="40" width="550" height="330" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-98" style="shape=tableRow;horizontal=0;startSize=0;swimlaneHead=0;swimlaneBody=0;top=0;left=0;bottom=0;right=0;dropTarget=0;collapsible=0;recursiveResize=0;expand=0;fontStyle=0;strokeColor=inherit;fillColor=#ffffff;" parent="WA2_J1DCvVjPXciXSW-M-97" vertex="1">
          <mxGeometry width="550" height="43" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-99" value="ID" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#A7C942;align=center;fontStyle=1;fontColor=#FFFFFF;html=1;" parent="WA2_J1DCvVjPXciXSW-M-98" vertex="1">
          <mxGeometry width="117" height="43" as="geometry">
            <mxRectangle width="117" height="43" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-100" value="Unterkategorie" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#A7C942;align=center;fontStyle=1;fontColor=#FFFFFF;html=1;" parent="WA2_J1DCvVjPXciXSW-M-98" vertex="1">
          <mxGeometry x="117" width="159" height="43" as="geometry">
            <mxRectangle width="159" height="43" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-101" value="Unterunterkategorie" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#A7C942;align=center;fontStyle=1;fontColor=#FFFFFF;html=1;" parent="WA2_J1DCvVjPXciXSW-M-98" vertex="1">
          <mxGeometry x="276" width="137" height="43" as="geometry">
            <mxRectangle width="137" height="43" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-165" value="Wort/Vokabel" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#A7C942;align=center;fontStyle=1;fontColor=#FFFFFF;html=1;" parent="WA2_J1DCvVjPXciXSW-M-98" vertex="1">
          <mxGeometry x="413" width="137" height="43" as="geometry">
            <mxRectangle width="137" height="43" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-102" value="" style="shape=tableRow;horizontal=0;startSize=0;swimlaneHead=0;swimlaneBody=0;top=0;left=0;bottom=0;right=0;dropTarget=0;collapsible=0;recursiveResize=0;expand=0;fontStyle=0;strokeColor=inherit;fillColor=#ffffff;" parent="WA2_J1DCvVjPXciXSW-M-97" vertex="1">
          <mxGeometry y="43" width="550" height="42" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-103" value="7.1.1" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-102" vertex="1">
          <mxGeometry width="117" height="42" as="geometry">
            <mxRectangle width="117" height="42" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-104" value="Außenarchitektur" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-102" vertex="1">
          <mxGeometry x="117" width="159" height="42" as="geometry">
            <mxRectangle width="159" height="42" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-105" value="Außenarchitektur allgemein" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-102" vertex="1">
          <mxGeometry x="276" width="137" height="42" as="geometry">
            <mxRectangle width="137" height="42" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-166" value="Außenarchitektur allgemein" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-102" vertex="1">
          <mxGeometry x="413" width="137" height="42" as="geometry">
            <mxRectangle width="137" height="42" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-187" style="shape=tableRow;horizontal=0;startSize=0;swimlaneHead=0;swimlaneBody=0;top=0;left=0;bottom=0;right=0;dropTarget=0;collapsible=0;recursiveResize=0;expand=0;fontStyle=0;strokeColor=inherit;fillColor=#ffffff;" parent="WA2_J1DCvVjPXciXSW-M-97" vertex="1">
          <mxGeometry y="85" width="550" height="41" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-188" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-187" vertex="1">
          <mxGeometry width="117" height="41" as="geometry">
            <mxRectangle width="117" height="41" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-189" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-187" vertex="1">
          <mxGeometry x="117" width="159" height="41" as="geometry">
            <mxRectangle width="159" height="41" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-190" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-187" vertex="1">
          <mxGeometry x="276" width="137" height="41" as="geometry">
            <mxRectangle width="137" height="41" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-191" value="Hof" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-187" vertex="1">
          <mxGeometry x="413" width="137" height="41" as="geometry">
            <mxRectangle width="137" height="41" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-106" value="" style="shape=tableRow;horizontal=0;startSize=0;swimlaneHead=0;swimlaneBody=0;top=0;left=0;bottom=0;right=0;dropTarget=0;collapsible=0;recursiveResize=0;expand=0;fontStyle=1;strokeColor=inherit;fillColor=#EAF2D3;" parent="WA2_J1DCvVjPXciXSW-M-97" vertex="1">
          <mxGeometry y="126" width="550" height="41" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-107" value="7.1.2" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-106" vertex="1">
          <mxGeometry width="117" height="41" as="geometry">
            <mxRectangle width="117" height="41" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-108" value="Außenarchitektur" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-106" vertex="1">
          <mxGeometry x="117" width="159" height="41" as="geometry">
            <mxRectangle width="159" height="41" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-109" value="Gebäudetypen" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-106" vertex="1">
          <mxGeometry x="276" width="137" height="41" as="geometry">
            <mxRectangle width="137" height="41" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-167" value="Gebäudetypen" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-106" vertex="1">
          <mxGeometry x="413" width="137" height="41" as="geometry">
            <mxRectangle width="137" height="41" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-110" value="" style="shape=tableRow;horizontal=0;startSize=0;swimlaneHead=0;swimlaneBody=0;top=0;left=0;bottom=0;right=0;dropTarget=0;collapsible=0;recursiveResize=0;expand=0;fontStyle=0;strokeColor=inherit;fillColor=#ffffff;" parent="WA2_J1DCvVjPXciXSW-M-97" vertex="1">
          <mxGeometry y="167" width="550" height="44" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-111" value="" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;fontStyle=0;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-110" vertex="1">
          <mxGeometry width="117" height="44" as="geometry">
            <mxRectangle width="117" height="44" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-112" value="" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;fontStyle=0;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-110" vertex="1">
          <mxGeometry x="117" width="159" height="44" as="geometry">
            <mxRectangle width="159" height="44" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-113" value="" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;fontStyle=0;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-110" vertex="1">
          <mxGeometry x="276" width="137" height="44" as="geometry">
            <mxRectangle width="137" height="44" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-168" value="Haus" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;fontStyle=0;align=center;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-110" vertex="1">
          <mxGeometry x="413" width="137" height="44" as="geometry">
            <mxRectangle width="137" height="44" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-114" value="" style="shape=tableRow;horizontal=0;startSize=0;swimlaneHead=0;swimlaneBody=0;top=0;left=0;bottom=0;right=0;dropTarget=0;collapsible=0;recursiveResize=0;expand=0;fontStyle=1;strokeColor=inherit;fillColor=#EAF2D3;" parent="WA2_J1DCvVjPXciXSW-M-97" vertex="1">
          <mxGeometry y="211" width="550" height="39" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-115" value="7.2" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-114" vertex="1">
          <mxGeometry width="117" height="39" as="geometry">
            <mxRectangle width="117" height="39" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-116" value="Innenarchitektur" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-114" vertex="1">
          <mxGeometry x="117" width="159" height="39" as="geometry">
            <mxRectangle width="159" height="39" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-117" value="" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-114" vertex="1">
          <mxGeometry x="276" width="137" height="39" as="geometry">
            <mxRectangle width="137" height="39" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-169" value="Innenarchitektur" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-114" vertex="1">
          <mxGeometry x="413" width="137" height="39" as="geometry">
            <mxRectangle width="137" height="39" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-175" style="shape=tableRow;horizontal=0;startSize=0;swimlaneHead=0;swimlaneBody=0;top=0;left=0;bottom=0;right=0;dropTarget=0;collapsible=0;recursiveResize=0;expand=0;fontStyle=1;strokeColor=inherit;fillColor=#FFFFFF;" parent="WA2_J1DCvVjPXciXSW-M-97" vertex="1">
          <mxGeometry y="250" width="550" height="40" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-176" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-175" vertex="1">
          <mxGeometry width="117" height="40" as="geometry">
            <mxRectangle width="117" height="40" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-177" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-175" vertex="1">
          <mxGeometry x="117" width="159" height="40" as="geometry">
            <mxRectangle width="159" height="40" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-178" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-175" vertex="1">
          <mxGeometry x="276" width="137" height="40" as="geometry">
            <mxRectangle width="137" height="40" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-179" value="Zimmer" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=inherit;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-175" vertex="1">
          <mxGeometry x="413" width="137" height="40" as="geometry">
            <mxRectangle width="137" height="40" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-170" style="shape=tableRow;horizontal=0;startSize=0;swimlaneHead=0;swimlaneBody=0;top=0;left=0;bottom=0;right=0;dropTarget=0;collapsible=0;recursiveResize=0;expand=0;fontStyle=1;strokeColor=inherit;fillColor=#EAF2D3;" parent="WA2_J1DCvVjPXciXSW-M-97" vertex="1">
          <mxGeometry y="290" width="550" height="40" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-171" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-170" vertex="1">
          <mxGeometry width="117" height="40" as="geometry">
            <mxRectangle width="117" height="40" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-172" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-170" vertex="1">
          <mxGeometry x="117" width="159" height="40" as="geometry">
            <mxRectangle width="159" height="40" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-173" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-170" vertex="1">
          <mxGeometry x="276" width="137" height="40" as="geometry">
            <mxRectangle width="137" height="40" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-174" value="Fußboden" style="connectable=0;recursiveResize=0;strokeColor=inherit;fillColor=#E6FFCC;whiteSpace=wrap;html=1;" parent="WA2_J1DCvVjPXciXSW-M-170" vertex="1">
          <mxGeometry x="413" width="137" height="40" as="geometry">
            <mxRectangle width="137" height="40" as="alternateBounds" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-323" value="&lt;b&gt;&lt;u&gt;b) Beispiel&lt;/u&gt;&lt;/b&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="WA2_J1DCvVjPXciXSW-M-195" vertex="1">
          <mxGeometry x="-30" width="80" height="30" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-197" value="" style="endArrow=none;html=1;rounded=0;" parent="1" edge="1">
          <mxGeometry width="50" height="50" relative="1" as="geometry">
            <mxPoint x="45" y="-110" as="sourcePoint" />
            <mxPoint x="815" y="-110" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-198" value="&lt;font style=&quot;font-size: 21px;&quot;&gt;&lt;b&gt;&lt;u&gt;2. Normvokabular-Abgleich&lt;/u&gt;&lt;/b&gt;&lt;/font&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="45" y="-70" width="290" height="40" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-199" value="" style="endArrow=none;dashed=1;html=1;dashPattern=1 3;strokeWidth=2;rounded=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;exitX=1;exitY=0.5;exitDx=0;exitDy=0;exitPerimeter=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-64" target="WA2_J1DCvVjPXciXSW-M-65" edge="1">
          <mxGeometry width="50" height="50" relative="1" as="geometry">
            <mxPoint x="440" y="1110" as="sourcePoint" />
            <mxPoint x="534" y="1110" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-202" value="Scanvorgang" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#B3FF66;" parent="1" vertex="1">
          <mxGeometry x="70" y="-670" width="200" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-203" value="Erfassen" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#B3FF66;" parent="1" vertex="1">
          <mxGeometry x="240" y="-400" width="200" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-207" value="Ebenenstruktur festlegen" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#B3FF66;" parent="1" vertex="1">
          <mxGeometry x="180" y="-490" width="200" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-209" value="Erfassungstabelle" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#99CCFF;" parent="1" vertex="1">
          <mxGeometry x="370" y="-200" width="247.5" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-210" value="&lt;ul&gt;&lt;li&gt;Durchgehen einer Box von vorne bis hinten&lt;/li&gt;&lt;li&gt;Auflegen des Objekts, Ausrichtung der Farbkarte&lt;/li&gt;&lt;li&gt;Manuelles Festlegen des Scanbereichs&lt;/li&gt;&lt;li&gt;Scan der gesamten Box&lt;/li&gt;&lt;/ul&gt;" style="text;strokeColor=none;fillColor=none;html=1;whiteSpace=wrap;verticalAlign=middle;overflow=hidden;" parent="1" vertex="1">
          <mxGeometry x="255" y="-690" width="320" height="80" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-211" value="&lt;ul&gt;&lt;li&gt;Durchgehen der exportierten Scans im Bildviewer&lt;/li&gt;&lt;li&gt;Festlegung der Scanebenen (Umschlag, Vorderseite, Rückseite, etc.)&lt;/li&gt;&lt;/ul&gt;" style="text;strokeColor=none;fillColor=none;html=1;whiteSpace=wrap;verticalAlign=middle;overflow=hidden;" parent="1" vertex="1">
          <mxGeometry x="367.5" y="-500" width="320" height="80" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-212" value="&lt;ul&gt;&lt;li&gt;Durchgehen der Scans&lt;/li&gt;&lt;li&gt;Erfassen: Datum, Urheber, Eigner, Material&lt;/li&gt;&lt;li&gt;Vermessen des Objekts&lt;/li&gt;&lt;li&gt;Objektbeschreibung: Verschlagwortung des Bildinhalts&lt;/li&gt;&lt;li&gt;Erfassen etwaiger Inschriften und Anmerkungen&lt;/li&gt;&lt;/ul&gt;" style="text;strokeColor=none;fillColor=none;html=1;whiteSpace=wrap;verticalAlign=middle;overflow=hidden;" parent="1" vertex="1">
          <mxGeometry x="438" y="-440" width="300" height="140" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-216" value="&lt;font style=&quot;font-size: 21px;&quot;&gt;&lt;b&gt;&lt;u&gt;1. Ablauf des Scan- und Erfassungsprozesses&lt;/u&gt;&lt;/b&gt;&lt;/font&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="35" y="-840" width="490" height="40" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-217" value="Vorbereitung" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#B3FF66;" parent="1" vertex="1">
          <mxGeometry x="30" y="-760" width="200" height="60" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-218" value="&lt;ul&gt;&lt;li&gt;PC hochfahren&lt;/li&gt;&lt;li&gt;Scanner starten/Kamera und Beleuchtung vorbereiten, Farbkarte platzieren&lt;/li&gt;&lt;li&gt;Software starten, Scanauftrag wählen&lt;/li&gt;&lt;li&gt;Erfassungstabelle öffnen&lt;/li&gt;&lt;li&gt;Passende Box wählen&lt;/li&gt;&lt;/ul&gt;" style="text;strokeColor=none;fillColor=none;html=1;whiteSpace=wrap;verticalAlign=middle;overflow=hidden;" parent="1" vertex="1">
          <mxGeometry x="212.5" y="-790" width="555" height="110" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-236" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.25;exitY=1;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-226" target="WA2_J1DCvVjPXciXSW-M-228" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-318" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-226" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="350" y="1573" as="targetPoint" />
            <Array as="points">
              <mxPoint x="340" y="1573" />
              <mxPoint x="360" y="1573" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-226" value="Kategorie" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="170" y="1562.5" width="150" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-237" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.25;exitY=1;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-228" target="WA2_J1DCvVjPXciXSW-M-229" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-228" value="Unterkategorie" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="290" y="1605" width="150" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-238" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.232;exitY=1.005;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;exitPerimeter=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-229" target="WA2_J1DCvVjPXciXSW-M-230" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="407.5" y="1652.5" as="sourcePoint" />
            <mxPoint x="440" y="1687.5" as="targetPoint" />
            <Array as="points">
              <mxPoint x="440" y="1700" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-320" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-229" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="590" y="1660" as="targetPoint" />
            <Array as="points">
              <mxPoint x="580" y="1660" />
              <mxPoint x="580" y="1660" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-229" value="Unterunterkategorie" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="405" y="1650" width="150" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-230" value="Wort/Vokabel" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="513" y="1690" width="150" height="20" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-313" value="&lt;font style=&quot;font-size: 10px;&quot;&gt;1&lt;/font&gt;" style="ellipse;whiteSpace=wrap;html=1;rounded=0;" parent="1" vertex="1">
          <mxGeometry x="352.5" y="1560" width="25" height="25" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-315" value="&lt;font style=&quot;font-size: 10px;&quot;&gt;1.1&lt;/font&gt;" style="ellipse;whiteSpace=wrap;html=1;rounded=0;" parent="1" vertex="1">
          <mxGeometry x="475" y="1602.5" width="25" height="25" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-316" value="&lt;font style=&quot;font-size: 10px;&quot;&gt;1.1.1&lt;/font&gt;" style="ellipse;whiteSpace=wrap;html=1;rounded=0;" parent="1" vertex="1">
          <mxGeometry x="592.5" y="1647.5" width="25" height="25" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-319" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" parent="1" source="WA2_J1DCvVjPXciXSW-M-228" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="470" y="1615" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-321" value="" style="endArrow=none;dashed=1;html=1;dashPattern=1 3;strokeWidth=2;rounded=0;" parent="1" edge="1">
          <mxGeometry width="50" height="50" relative="1" as="geometry">
            <mxPoint x="50" y="1740" as="sourcePoint" />
            <mxPoint x="800" y="1740" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-322" value="&lt;b&gt;&lt;u&gt;a) Hierarchie und ID-Struktur&lt;/u&gt;&lt;/b&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
          <mxGeometry x="40" y="1530" width="190" height="30" as="geometry" />
        </mxCell>
        <mxCell id="WA2_J1DCvVjPXciXSW-M-193" value="Blatt 7 - Architektur" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="140" y="2110" width="165" height="30" as="geometry" />
        </mxCell>
        <mxCell id="B-3lv8s0GtbLfT8x5DVe-1" value="Scan exportieren" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#B3FF66;" parent="1" vertex="1">
          <mxGeometry x="122.5" y="-580" width="200" height="60" as="geometry" />
        </mxCell>
        <mxCell id="B-3lv8s0GtbLfT8x5DVe-2" value="&lt;ul&gt;&lt;li&gt;Export der gesamten Scans einer Box in einen Ordner&lt;/li&gt;&lt;li&gt;Reihenfolge der Scans checken&lt;/li&gt;&lt;/ul&gt;" style="text;strokeColor=none;fillColor=none;html=1;whiteSpace=wrap;verticalAlign=middle;overflow=hidden;" parent="1" vertex="1">
          <mxGeometry x="307.5" y="-590" width="320" height="80" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-1" value="Erfassung prüfen" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#B3FF66;" vertex="1" parent="1">
          <mxGeometry x="310" y="-300" width="200" height="60" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-2" value="&lt;ul&gt;&lt;li&gt;Durchgehen der Scans, Vergleich der Nummern mit dem Inhalt der Erfassung&lt;/li&gt;&lt;li&gt;Makro laufen lassen: Prüft Begriffe unter &quot;Objektbschreibung&quot; auf Treffer im Normvokabular (siehe Anleitung)&lt;/li&gt;&lt;/ul&gt;" style="text;strokeColor=none;fillColor=none;html=1;whiteSpace=wrap;verticalAlign=middle;overflow=hidden;" vertex="1" parent="1">
          <mxGeometry x="490" y="-310" width="320" height="90" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-3" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.3;entryY=0;entryDx=0;entryDy=0;entryPerimeter=0;" edge="1" parent="1" source="WA2_J1DCvVjPXciXSW-M-217" target="WA2_J1DCvVjPXciXSW-M-202">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-4" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.238;entryY=0;entryDx=0;entryDy=0;entryPerimeter=0;" edge="1" parent="1" source="WA2_J1DCvVjPXciXSW-M-202" target="B-3lv8s0GtbLfT8x5DVe-1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.213;entryY=0;entryDx=0;entryDy=0;entryPerimeter=0;" edge="1" parent="1" source="B-3lv8s0GtbLfT8x5DVe-1" target="WA2_J1DCvVjPXciXSW-M-207">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-7" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.2;entryY=0;entryDx=0;entryDy=0;entryPerimeter=0;" edge="1" parent="1" source="WA2_J1DCvVjPXciXSW-M-207" target="WA2_J1DCvVjPXciXSW-M-203">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-8" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.148;entryY=-0.056;entryDx=0;entryDy=0;entryPerimeter=0;" edge="1" parent="1" source="WA2_J1DCvVjPXciXSW-M-203" target="ey7EfLCcf-ExpX1qzLUj-1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-10" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.161;entryY=-0.039;entryDx=0;entryDy=0;entryPerimeter=0;" edge="1" parent="1" source="ey7EfLCcf-ExpX1qzLUj-1" target="WA2_J1DCvVjPXciXSW-M-209">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-11" value="&lt;font style=&quot;font-size: 15px;&quot;&gt;&lt;b&gt;Stand: 14.10.25&lt;/b&gt;&lt;/font&gt;" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1">
          <mxGeometry x="745" y="-1090" width="105" height="50" as="geometry" />
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-12" value="" style="endArrow=none;dashed=1;html=1;dashPattern=1 3;strokeWidth=2;rounded=0;" edge="1" parent="1">
          <mxGeometry width="50" height="50" relative="1" as="geometry">
            <mxPoint x="470" y="-880" as="sourcePoint" />
            <mxPoint x="520" y="-880" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="ey7EfLCcf-ExpX1qzLUj-13" value="&lt;b&gt;Optional/Optimierungsmöglichkeit&lt;/b&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" vertex="1" parent="1">
          <mxGeometry x="530" y="-896" width="220" height="30" as="geometry" />
        </mxCell>
      </root>
    </mxGraphModel>
  </diagram>
 </mxfile>
--- a/Wiki_Anleitungen/ExLibris_Scannen_und_Erfassen_Leitfaden_wikimarkup
+++ b/Wiki_Anleitungen/ExLibris_Scannen_und_Erfassen_Leitfaden_wikimarkup
@ -0,0 +1,97 @@
 = ExLibris Scannen und Erfassen – Leitfaden =
 Die Digitalisierung von Exlibris-Objekten ist in zwei Hauptschritte gegliedert: **Scannen** und **Erfassen** der Objekte.  
 Jedes Objekt ist Teil einer Box, jede Box ist Teil eines Oktavs (°).  
 Eine Box wird vollständig gescannt, bevor die Erfassung beginnt – das vermeidet Fehler, wenn sich z. B. die Reihenfolge der Scans durch nachträgliches Hinzufügen oder Löschen ändert.
 == 1. Vorbereitung ==
 === a) Hardware vorbereiten ===
 * PC starten  
 * Lichter anschalten  
 * Kameraabdeckung entfernen  
 * Kamera einschalten  
 === b) Software vorbereiten ===
 * MultiDotScan by Walter Nagel starten  
 * Scanauftrag auswählen → '''WELCHEN SCANAUFTRAG'''
 == 2. Scanvorgang ==
 * Objekt auf Unterlage legen, rechtwinklig zum Bildausschnitt in der Software  
 * Farbkarte anlegen – je nach Format des Objekts rechts oder unten  
 * Bildausschnitt an Objekt anpassen (Rand ca. 10–20 mm)  
 * Kamera mit dem Pedal auslösen → '''Scan'''  
 * Wenn ein Scan fehlt oder neu gemacht werden muss: in der Software an die richtige Stelle ziehen → Scans werden beim Export automatisch in der korrekten Reihenfolge angeordnet  
 == 3. Scans exportieren ==
 * Nach dem Scan der gesamten Box den Scanauftrag exportieren → landet unter '''DATEIPFAD'''
 == 4. Erfassen ==
 * Erfasste Scans unter '''DATEIPFAD''' öffnen  
  (im Bildexplorer, '''nicht''' in der Scansoftware – sonst kann sich die Reihenfolge der Scans ändern, was zu Fehlern in der Erfassungstabelle führt)
 * Jede Box (= jeder Scanauftrag) beginnt numerisch bei 1; jeder Scan ist fortlaufend nummeriert  
 * Scannummern in die Erfassungstabelle eintragen, dabei Ebenenstruktur berücksichtigen:
 === Ebenenstruktur ===
 * Standard: Vorderseite → Ebene 0, Rückseite → Ebene 1  
 * Wenn das Exlibris einen Umschlag hat oder mehrere Exlibris in einem Briefumschlag liegen:
  * Umschlag = Ebene 0  
  * Vorderseite = Ebene 1  
  * Rückseite = Ebene 2  
  * Rückseite Umschlag = Ebene 3  
  → So ist klar erkennbar, wo ein Umschlag beginnt und endet.
 === Erfassen der Metadaten ===
 * Jahr – steht eine Jahreszahl auf Vorder- oder Rückseite?  
 * Urheber – Künstler  
 * Eigner – wem gehört das Exlibris?  
 * Objektbeschreibung – was ist zu sehen? Verschlagwortung des Bildinhalts  
 ==== Beachten ====
 * Beschreibung von '''grob → genau'''  
  * Beispiel: „Baum“ statt „Schwarzeiche“  
  * „Helm“ statt „Topfhelm 15. Jahrhundert“  
 * '''Singularformen''' bevorzugen – auch bei mehreren Objekten  
  * z. B. „Buch“ statt „Bücher“, „Figur, weiblich“ statt „Frauengruppe“  
 * '''Aktivitäten im Infinitiv''' angeben: „sitzen“, „lesen“, „fahren“ statt „sitzt“, „lesend“, „fährt“  
 * '''Verbindungswörter vermeiden''' („Stopwords“):  
  <nowiki>mit, ohne, der, die, das, ein, eine, und, zu, von, im, in, auf, an, als, bei, für, aus, dem, den, des, eines, einer</nowiki>  
  (werden vom Mapper-Makro ohnehin herausgefiltert)
 * Material – meist Papier  
 * Maße – Höhe × Breite in cm (bei geraden cm-Zahlen „,0“ anfügen, z. B. 14,3 × 7,0 cm statt 14,3 × 7)  
 * Objekttyp – Exlibris, Rückseite, Umschlag, Zettel  
 * Inschrift – z. B. Wappen mit Spruchband  
 * Anmerkungen – sonstige Notizen oder Hinweise (Bleistifteinträge etc.)  
 * AUX – irrelevant  
 == 5. Erfassung überprüfen ==
 * Stimmt die Nummerierung der Scans mit der entsprechenden Zeile in der Erfassungstabelle überein?  
 * Makro über die Tabelle laufen lassen:  
  Es existiert ein Makro für den Abgleich der Spalte „Objektbeschreibung“ mit dem internen Normvokabular, das die Verschlagwortung vereinheitlicht.  
 Dieses Makro kann direkt in LibreOffice Calc über das Menü gestartet werden:
 <pre>
 Extras → Makros → Makros verwalten → Python → 
 Meine Makros → Vokabular_Abgleich_Makro → mapper_macro_2.x → run_mapper_macro → Ausführen
 </pre>
 '''Hinweis:'''  
 Für die Benutzung des Makros liegt eine ausführliche Anleitung unter '''DATEIPFAD'''.
 == 6. Abschluss ==
 * Vordruck ausfüllen:  
  * Name  
  * Datum  
  * Welche Box  
  * Bis wohin wurde gescannt/erfasst  
  * Gibt es etwas zu beachten?
 == 7. Best Practices ==
 * Lieber zu viel als zu wenig scannen (Rückseiten, Umschläge usw.)  
 * Lieber zu viel als zu wenig beschreiben (alles, was nachvollziehbar erkennbar ist, kann verschlagwortet werden)  
 * Notizen oder Beschriftungen auf Exlibris oder Rückseiten vollständig erfassen  
 * Bei Unsicherheiten: nachfragen  
--- a/api_cache.json
+++ b/api_cache.json
--- a/config.json
+++ b/config.json
@ -1,9 +0,0 @@
 {
  "normvokabular_path": "/home/jarnold/projects/GND-Skript Test/Input CSV/Normvokabular_INTERN/NV_MASTER.ods",
  "max_suggestions": 3,
  "color_hit": "#C6EFCE",
  "color_miss": "#FFC7CE",
  "use_rapidfuzz": false,
  "use_spacy": false,
  "autosave": false
 }
--- a/mapper.py
+++ b/mapper.py
@ -1,371 +0,0 @@
 import os
 import sys
 import re
 import time
 import json
 import pandas as pd
 import requests
 from pathlib import Path
 from collections import defaultdict
 from difflib import SequenceMatcher
 # RapidFuzz für Token-basierte Fuzzy-Suche
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
    print("RapidFuzz verfügbar")
 except ImportError:
    RAPIDFUZZ_AVAILABLE = False
    print("RapidFuzz nicht verfügbar – nutze SequenceMatcher")
 # Spacy Lemmatizer
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
    print("Spacy Lemmatizer aktiviert")
 except:
    SPACY_AVAILABLE = False
    nlp = None
    print("Spacy nicht verfügbar – nutze naive Stemmer")
 # =========================
 # Pfade & Config
 # =========================
 INPUT_DIR = Path("Input CSV")
 OUTPUT_DIR = Path("Auswertung Ergebnisse")
 OUTPUT_DIR.mkdir(exist_ok=True)
 NORMVOC_FILE = Path("Input CSV/Normvokabular_INTERN/NV_MASTER.ods")
 CACHE_FILE = "api_cache.json"
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75
 TIMEOUT = 5
 MAX_RETRIES = 3
 BACKOFF_FACTOR = 2
 HEADERS = {"User-Agent": "VLG_API_multi/1.0 (projekt@example.com)"}
 API_ACTIVE = {"gnd": True, "wikidata": True}
 FAIL_COUNTER = {"gnd": 0, "wikidata": 0}
 # Cache
 if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE,"r",encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 def save_cache():
    with open(CACHE_FILE,"w",encoding="utf-8") as f:
        json.dump(CACHE, f, indent=2, ensure_ascii=False)
 # =========================
 # Normalisierung / Lemma
 # =========================
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 # Lemma-Cache
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        doc = nlp(term_norm)
        lemma = " ".join([token.lemma_ for token in doc])
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 # =========================
 # Kompositum-Zerlegung (einfacher Ansatz)
 # =========================
 def compound_split(term):
    parts = re.findall(r'[A-ZÄÖÜ][a-zäöü]+', term)
    return parts if parts else [term]
 # =========================
 # Normvokabular laden & Lemma vorbereiten
 # =========================
 def load_normvokabular(file_path):
    sheets = pd.read_excel(file_path, sheet_name=None, engine="odf" if file_path.suffix.lower()==".ods" else None)
    norm_dict = {}
    stem_index = defaultdict(list)
    lemma_norm_map = {}  # für RapidFuzz preprocessed
    for sheet_name, df in sheets.items():
        if sheet_name.lower() in ["master", "übersicht"]:
            continue
        df = df.dropna(how="all", axis=1)
        df.columns = [str(c).strip() for c in df.columns]
        id_col = next((c for c in df.columns if "ID" in c), None)
        word_col = next((c for c in df.columns if "Wort" in c or "Vokabel" in c), None)
        if not id_col or not word_col:
            continue
        current_parent_id = None
        for _, row in df.iterrows():
            row_id = str(row[id_col]).strip() if pd.notna(row[id_col]) else None
            row_word = str(row[word_col]).strip() if pd.notna(row[word_col]) else None
            if row_id:
                current_parent_id = row_id
            if not row_word:
                continue
            assigned_parent_id = current_parent_id
            entry = {
                "Name": row_word,
                "ID": assigned_parent_id,   # Parent-ID
                "Sheet": sheet_name,
                "Own_ID": row_id or ""      # eigene ID, falls vorhanden
            }
            key = normalize_text(row_word)
            norm_dict[key] = entry
            lemma = lemmatize_term(key)
            stem_index[lemma].append(entry)
            if lemma not in lemma_norm_map:
                lemma_norm_map[lemma] = entry
    return norm_dict, stem_index, lemma_norm_map
 # =========================
 # Mapping & Vorschläge
 # =========================
 def map_to_norm(term, norm_dict, stem_index, lemma_norm_map, top_n=3):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    # Exakter Treffer
    if term_norm in norm_dict:
        e = norm_dict[term_norm]
        return e["Name"], e["ID"], []
    # Lemma-Treffer
    if term_lemma in stem_index:
        e = stem_index[term_lemma][0]
        return e["Name"], e["ID"], []
    # KEIN TREFFER → Kompositum-Split
    tokens = compound_split(term)
    if len(tokens) == 1:
        suggestions = get_suggestions(term_lemma, lemma_norm_map, top_n)
        return "KEIN TREFFER", "", suggestions
    else:
        token_matches = []
        for t in tokens:
            t_lemma = lemmatize_term(t)
            if t_lemma in stem_index:
                e = stem_index[t_lemma][0]
                token_matches.append((t, e["Name"], e["ID"]))
            else:
                sugg = get_suggestions(t_lemma, lemma_norm_map, top_n)
                token_matches.append((t, "KEIN TREFFER", "", sugg))
        combined_suggestions = [f"{m[1]} ({m[2]})" for m in token_matches if m[1] != "KEIN TREFFER"]
        return "KEIN TREFFER", "", combined_suggestions
 def get_suggestions(term_lemma, lemma_norm_map, top_n=3, threshold=CONF_THRESHOLD):
    candidates = []
    for key_lemma, entry in lemma_norm_map.items():
        if RAPIDFUZZ_AVAILABLE:
            score = fuzz.token_set_ratio(term_lemma, key_lemma)/100
        else:
            score = SequenceMatcher(None, term_lemma.lower(), key_lemma.lower()).ratio()
        if key_lemma.lower().startswith(term_lemma.lower()):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            candidates.append((score, entry["Name"], entry["ID"]))
    candidates.sort(reverse=True)
    return [f"{name} ({id_})" for _, name, id_ in candidates[:top_n]]
 # =========================
 # API-Abfragen
 # =========================
 def request_with_retries(api_name,url,params=None):
    cache_key = url + str(params)
    if cache_key in CACHE:
        return CACHE[cache_key]
    retries = 0
    while retries < MAX_RETRIES:
        try:
            r = requests.get(url, params=params, timeout=TIMEOUT, headers=HEADERS)
            if r.status_code == 200:
                try: data = r.json()
                except: data = r.text
                CACHE[cache_key] = data
                FAIL_COUNTER[api_name] = 0
                return data
        except:
            pass
        retries += 1
        time.sleep(min(BACKOFF_FACTOR**retries,30))
    FAIL_COUNTER[api_name] += 1
    if FAIL_COUNTER[api_name] >= 10:
        API_ACTIVE[api_name] = False
    return None
 def batch_query_gnd(terms):
    results={}
    if not API_ACTIVE.get("gnd", False):
        for t in terms: results[t] = ""
        return results
    for t in terms:
        url="https://lobid.org/gnd/search"
        params={"q":t,"format":"json"}
        data = request_with_retries("gnd", url, params)
        top = ""
        if data and "member" in data:
            cands = [(doc.get("preferredName","") or doc.get("name",""), SequenceMatcher(None,t.lower(),(doc.get("preferredName","") or doc.get("name","")).lower()).ratio()) for doc in data["member"] if doc.get("preferredName","") or doc.get("name","")]
            cands = [c for c in cands if c[1]>=0.75]
            if cands:
                top = sorted(cands,key=lambda x:x[1],reverse=True)[0][0]
        results[t] = top
    return results
 def batch_query_wikidata(terms):
    results={}
    if not API_ACTIVE.get("wikidata", False):
        for t in terms: results[t] = ""
        return results
    for t in terms:
        url="https://www.wikidata.org/w/api.php"
        params={"action":"wbsearchentities","search":t,"language":"de","format":"json"}
        data = request_with_retries("wikidata", url, params)
        top = ""
        if data and "search" in data:
            cands = [(e.get("label",""), SequenceMatcher(None,t.lower(),e.get("label","").lower()).ratio()) for e in data["search"] if e.get("label","")]
            cands = [c for c in cands if c[1]>=0.70]
            if cands:
                top = sorted(cands,key=lambda x:x[1],reverse=True)[0][0]
        results[t] = top
    return results
 # =========================
 # Markierung / Export
 # =========================
 def mark_norm_hits(file_path):
    ext = file_path.suffix.lower()
    if ext in [".xlsx", ".xls"]:
        from openpyxl import load_workbook
        from openpyxl.styles import PatternFill
        wb = load_workbook(file_path)
        ws = wb.active
        green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
        red_fill   = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
        col_map = {cell.value: idx+1 for idx, cell in enumerate(ws[1])}
        norm_col = col_map.get("Norm_Treffer", None)
        if not norm_col:
            print("Spalte 'Norm_Treffer' nicht gefunden")
            return
        for row in ws.iter_rows(min_row=2, min_col=norm_col, max_col=norm_col):
            cell = row[0]
            if cell.value and cell.value != "KEIN TREFFER":
                cell.fill = green_fill
            else:
                cell.fill = red_fill
        wb.save(file_path)
    elif ext==".ods":
        df = pd.read_excel(file_path, engine="odf")
        df["Norm_Status"] = df["Norm_Treffer"].apply(lambda x: "Treffer" if pd.notna(x) and str(x).strip() and x!="KEIN TREFFER" else "Kein Treffer")
        df.to_excel(file_path, index=False, engine="odf")
 # =========================
 # Verarbeitung Input-Dateien
 # =========================
 def process_files():
    norm_dict, stem_index, lemma_norm_map = load_normvokabular(NORMVOC_FILE)
    total_terms = 0
    total_hits = 0
    if not INPUT_DIR.exists():
        print(f"Eingabeordner {INPUT_DIR} fehlt")
        sys.exit(1)
    files = list(INPUT_DIR.glob("*"))
    if not files:
        print("Keine Dateien gefunden")
        return
    for file_path in files:
        if not file_path.suffix.lower() in [".csv",".ods",".xls",".xlsx"]:
            continue
        print(f"Verarbeite Datei: {file_path.name}")
        try:
            if file_path.suffix.lower() == ".csv":
                df = pd.read_csv(file_path)
            else:
                df = pd.read_excel(file_path, engine="odf" if file_path.suffix.lower()==".ods" else None)
        except Exception as e:
            print(f"Fehler beim Lesen von {file_path.name}: {e}")
            continue
        df = df.dropna(how="all")
        df.columns = [str(c).strip() for c in df.columns]
        besch_col = next((c for c in df.columns if "Objektbeschreibung" in c), None)
        box_col = next((c for c in df.columns if "Objekt/Ebene" in c), None)
        urh_col = next((c for c in df.columns if "Urheber" in c), None)
        if not besch_col: continue
        row_terms_map = []
        for _, row in df.iterrows():
            besch = str(row[besch_col]).strip() if pd.notna(row[besch_col]) else ""
            if not besch: continue
            obj_box = row[box_col] if box_col else ""
            urheber = row[urh_col] if urh_col else ""
            clauses = [c.strip() for c in re.split(r",", besch) if c.strip()]
            terms = []
            for clause in clauses:
                parts = [p.strip() for p in re.split(r"\s+", clause) if p.strip()]
                for p in parts:
                    if p.lower() in STOPWORDS: continue
                    if re.fullmatch(r"\d+", p): continue
                    terms.append(p)
            row_terms_map.append((obj_box, urheber, terms))
        all_terms = list({t for _,_,terms in row_terms_map for t in terms})
        gnd_results = batch_query_gnd(all_terms)
        wd_results = batch_query_wikidata(all_terms)
        output_rows = []
        for obj_box, urheber, terms in row_terms_map:
            for term in terms:
                norm_name, norm_id, suggestions = map_to_norm(term, norm_dict, stem_index, lemma_norm_map)
                total_terms += 1
                if norm_name != "KEIN TREFFER":
                    total_hits += 1
                out_row = {
                    "Box": obj_box,
                    "Objekt/Ebene": obj_box,
                    "Urheber": urheber,
                    "Begriff": term,
                    "Norm_Treffer": norm_name,
                    "Norm_ID": norm_id,
                    "Norm_Vorschlag": ", ".join(suggestions) if suggestions else "",
                    "GND_Top1": gnd_results.get(term,""),
                    "WD_Top1": wd_results.get(term,"")
                }
                output_rows.append(out_row)
        out_df = pd.DataFrame(output_rows)
        output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}{file_path.suffix}"
        version = 1
        while output_file.exists():
            output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}_({version}){file_path.suffix}"
            version += 1
        engine = "odf" if output_file.suffix.lower()==".ods" else None
        out_df.to_excel(output_file, index=False, engine=engine)
        mark_norm_hits(output_file)
        print(f"Auswertung gespeichert: {output_file}")
    save_cache()
    print(f"Gesamt: {total_terms} Begriffe, {total_hits} Treffer im Normvokabular")
 # =========================
 # Main
 # =========================
 if __name__ == "__main__":
    process_files()
    print("Fertig")
--- a/mapper_macro.py
+++ b/mapper_macro.py
@ -1,237 +0,0 @@
 import uno
 import os
 import re
 import traceback
 import json
 # Optional für Lemmatizer
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except:
    SPACY_AVAILABLE = False
    nlp = None
 # Optional für Fuzzy Matching
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except:
    from difflib import SequenceMatcher
    RAPIDFUZZ_AVAILABLE = False
 import odf.opendocument
 import odf.table
 import odf.text
 # ------------------------
 # Konfiguration absolute Pfade
 # ------------------------
 BASE_DIR = "/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro"
 NV_MASTER_PATH = os.path.join(BASE_DIR, "NV_MASTER.ods")
 LOG_FILE = os.path.join(BASE_DIR, "mapper_macro.log")
 CACHE_FILE = os.path.join(BASE_DIR, "mapper_cache.json")
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75
 # ------------------------
 # Logging
 # ------------------------
 def log(msg):
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(msg + "\n")
 # ------------------------
 # Cache laden
 # ------------------------
 if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE, "r", encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 # ------------------------
 # Normalisierung / Lemma
 # ------------------------
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        doc = nlp(term_norm)
        lemma = " ".join([token.lemma_ for token in doc])
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 # ------------------------
 # NV_MASTER einlesen
 # ------------------------
 def load_nv_master(path):
    norm_dict = {}
    try:
        doc = odf.opendocument.load(path)
    except Exception as e:
        log(f"Fehler beim Laden von NV_MASTER: {e}")
        return norm_dict
    for sheet in doc.spreadsheet.getElementsByType(odf.table.Table):
        sheet_name = sheet.getAttribute("name")
        if sheet_name.lower() == "master":
            continue
        current_parent_id = None
        for row in sheet.getElementsByType(odf.table.TableRow):
            cells = row.getElementsByType(odf.table.TableCell)
            cell_values = []
            for cell in cells:
                texts = cell.getElementsByType(odf.text.P)
                if texts and texts[0].firstChild:
                    cell_values.append(str(texts[0].firstChild.data).strip())
                else:
                    cell_values.append("")
            if not cell_values or len(cell_values)<4:
                continue
            id_val, unterk, unterunterk, word = cell_values[:4]
            if id_val:
                current_parent_id = id_val.strip()
            if not word:
                continue
            key = lemmatize_term(word)
            norm_dict[key] = {
                "Name": word.strip(),
                "ID": current_parent_id,
                "Sheet": sheet_name,
                "Unterkategorie": unterk.strip(),
                "Unterunterkategorie": unterunterk.strip()
            }
    log(f"NV_MASTER geladen: {len(norm_dict)} Begriffe")
    return norm_dict
 # ------------------------
 # Matching
 # ------------------------
 def get_suggestions(term_lemma, norm_dict, top_n=3, threshold=CONF_THRESHOLD):
    candidates = []
    for key, entry in norm_dict.items():
        if RAPIDFUZZ_AVAILABLE:
            score = fuzz.token_set_ratio(term_lemma, key)/100
        else:
            score = SequenceMatcher(None, term_lemma.lower(), key.lower()).ratio()
        if key.lower().startswith(term_lemma.lower()):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            candidates.append((score, entry["Name"], entry["ID"]))
    candidates.sort(reverse=True)
    return [f"{name} ({id_})" for _, name, id_ in candidates[:top_n]]
 def map_word(word, norm_dict):
    key = lemmatize_term(word)
    if key in CACHE:
        cached = CACHE[key]
        return cached["Norm"], cached["Suggestion"], cached["ID"]
    if key in norm_dict:
        entry = norm_dict[key]
        tr, sug, wid = entry["Name"], "", entry["ID"]
    else:
        suggestions = get_suggestions(term_lemma=key, norm_dict=norm_dict)
        if suggestions:
            tr, sug, wid = "KEIN TREFFER", ", ".join(suggestions), ""
        else:
            tr, sug, wid = "KEIN TREFFER", "", ""
    CACHE[key] = {"Norm": tr, "Suggestion": sug, "ID": wid}
    return tr, sug, wid
 # ------------------------
 # Makro-Hauptfunktion
 # ------------------------
 def run_mapper_macro():
    try:
        doc = XSCRIPTCONTEXT.getDocument()
        sheets = doc.getSheets()
        sheet = sheets.getByIndex(0)
        cursor = sheet.createCursor()
        cursor.gotoStartOfUsedArea(False)
        cursor.gotoEndOfUsedArea(True)
        data_range = cursor.getRangeAddress()
        header_row = 0
        objekt_col = None
        # Header prüfen
        for col in range(data_range.EndColumn+1):
            val = sheet.getCellByPosition(col, header_row).String.strip().lower()
            if val == "objektbeschreibung":
                objekt_col = col
                break
        if objekt_col is None:
            log("Spalte 'Objektbeschreibung' nicht gefunden")
            return
        # Neue Spalten am rechten Tabellenende erstellen
        max_col = data_range.EndColumn
        norm_tr_col = max_col + 1
        norm_sug_col = max_col + 2
        norm_id_col = max_col + 3
        sheet.getCellByPosition(norm_tr_col, header_row).String = "Norm_Treffer"
        sheet.getCellByPosition(norm_sug_col, header_row).String = "Norm_Vorschlag"
        sheet.getCellByPosition(norm_id_col, header_row).String = "Norm_ID"
        norm_dict = load_nv_master(NV_MASTER_PATH)
        # Farben
        GREEN = 0xC6EFCE
        YELLOW = 0xFFEB9C
        RED = 0xFFC7CE
        for row in range(1, data_range.EndRow+1):
            cell = sheet.getCellByPosition(objekt_col, row)
            val = cell.String.strip()
            if not val:
                continue
            words = [w.strip() for w in re.split(r"\s+", val) if w.strip() and w.lower() not in STOPWORDS]
            tr_list, sug_list, id_list = [], [], []
            for w in words:
                tr, sug, wid = map_word(w, norm_dict)
                if tr != "KEIN TREFFER":
                    tr_list.append(tr)
                if sug:
                    sug_list.append(sug)
                if wid:
                    id_list.append(wid)
            sheet.getCellByPosition(norm_tr_col, row).String = ", ".join(tr_list)
            sheet.getCellByPosition(norm_sug_col, row).String = ", ".join(sug_list)
            sheet.getCellByPosition(norm_id_col, row).String = ", ".join(id_list)
            # Farbmarkierung
            if tr_list:
                cell.CellBackColor = GREEN
            elif sug_list:
                cell.CellBackColor = YELLOW
            else:
                cell.CellBackColor = RED
        # Cache speichern
        with open(CACHE_FILE, "w", encoding="utf-8") as f:
            json.dump(CACHE, f, ensure_ascii=False, indent=2)
        log("Makro erfolgreich ausgeführt")
    except Exception as e:
        log("Fehler in run_mapper_macro:")
        log(traceback.format_exc())
--- a/mapper_macro_1.2.py
+++ b/mapper_macro_1.2.py
@ -1,297 +0,0 @@
 # -*- coding: utf-8 -*-
 import os
 import uno
 import unohelper
 import re
 import json
 import pandas as pd
 from pathlib import Path
 from difflib import SequenceMatcher
 # RapidFuzz für Fuzzy-Suche
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except ImportError:
    RAPIDFUZZ_AVAILABLE = False
 # Spacy Lemmatizer
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except:
    SPACY_AVAILABLE = False
    nlp = None
 # =========================
 # Pfade & Config
 # =========================
 SCRIPT_DIR = Path("/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro")
 NV_MASTER_FILE = SCRIPT_DIR / "NV_MASTER.ods"
 CACHE_FILE = SCRIPT_DIR / "mapper_cache.json"
 LOG_FILE = SCRIPT_DIR / "mapper_log.txt"
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 # =========================
 # Cache & Logging
 # =========================
 if CACHE_FILE.exists():
    with open(CACHE_FILE,"r",encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 def save_cache():
    with open(CACHE_FILE,"w",encoding="utf-8") as f:
        json.dump(CACHE, f, indent=2, ensure_ascii=False)
 def log(msg):
    with open(LOG_FILE,"a",encoding="utf-8") as f:
        f.write(msg + "\n")
 # =========================
 # Textverarbeitung
 # =========================
 def normalize_text(s):
    if not s: return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        doc = nlp(term_norm)
        lemma = " ".join([token.lemma_ for token in doc])
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 def compound_split(term):
    parts = re.findall(r'[A-ZÄÖÜa-zäöü]+', term)
    return parts if parts else [term]
 # =========================
 # NV_MASTER laden
 # =========================
 def load_normvokabular(file_path):
    sheets = pd.read_excel(file_path, sheet_name=None, engine="odf")
    norm_dict = {}
    for sheet_name, df in sheets.items():
        df = df.dropna(how="all", axis=1)
        df.columns = [str(c).strip() for c in df.columns]
        if "ID" not in df.columns or "Wort/Vokabel" not in df.columns:
            continue
        current_parent_id = None
        for _, row in df.iterrows():
            row_id = str(row["ID"]).strip() if pd.notna(row["ID"]) else None
            row_word = str(row["Wort/Vokabel"]).strip() if pd.notna(row["Wort/Vokabel"]) else None
            if row_id: current_parent_id = row_id
            if not row_word: continue
            norm_dict[normalize_text(row_word)] = {
                "ID": current_parent_id,
                "Wort/Vokabel": row_word
            }
    return norm_dict
 # =========================
 # Mapping
 # =========================
 def map_term_with_indexes(term, norm_dict):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term_norm)
    # Cache prüfen
    if term_lemma in CACHE:
        cached = CACHE[term_lemma]
        if isinstance(cached, dict) and all(k in cached for k in ("hits","suggestions","ids")):
            return cached["hits"], cached["suggestions"], cached["ids"]
        else:
            CACHE.pop(term_lemma, None)
    hits = []
    suggestions = []
    ids = []
    # Exakte Treffer
    if term_norm in norm_dict:
        e = norm_dict[term_norm]
        hits.append(e["Wort/Vokabel"])
        ids.append(e["ID"])
    elif term_lemma in norm_dict:
        e = norm_dict[term_lemma]
        hits.append(e["Wort/Vokabel"])
        ids.append(e["ID"])
    else:
        # Fuzzy Matching
        for key, e in norm_dict.items():
            score = fuzz.token_sort_ratio(term_lemma, key)/100.0 if RAPIDFUZZ_AVAILABLE else SequenceMatcher(None, term_lemma, key).ratio()
            if score >= 0.75:
                suggestions.append(e["Wort/Vokabel"])
                ids.append(e["ID"])
    CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
    return hits, suggestions, ids
 # =========================
 # LibreOffice Dialog (ListBox + Checkbox)
 # =========================
 def apply_proposals_dialog():
    ctx = uno.getComponentContext()
    smgr = ctx.ServiceManager
    desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
    doc = desktop.getCurrentComponent()
    if not doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
        log("Kein Calc-Dokument aktiv")
        return
    selection = doc.CurrentSelection
    sheet = doc.CurrentController.ActiveSheet
    # Prüfen ob eine Zelle ausgewählt ist
    if selection is None or not hasattr(selection, "getCellAddress"):
        log("Keine Zelle ausgewählt")
        return
    cell = selection
    # Spalte überprüfen
    header_row = sheet.getCellRangeByPosition(0,0,sheet.Columns.Count-1,0)
    objekt_col = None
    norm_vorschlag_col = None
    for col_idx in range(sheet.Columns.Count):
        val = sheet.getCellByPosition(col_idx,0).String
        if val.strip().lower() == "objektbeschreibung":
            objekt_col = col_idx
        elif val.strip().lower() == "norm_vorschlag":
            norm_vorschlag_col = col_idx
    if norm_vorschlag_col is None or objekt_col is None:
        log("Spalte 'Norm_Vorschlag' oder 'Objektbeschreibung' nicht gefunden")
        return
    # Vorschläge auslesen
    proposals_str = sheet.getCellByPosition(norm_vorschlag_col, cell.RangeAddress.StartRow).String
    if not proposals_str.strip():
        log("Keine Vorschläge in der ausgewählten Zelle")
        return
    proposals = [p.strip() for p in proposals_str.split(";") if p.strip()]
    # Dialog erstellen
    toolkit = smgr.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx)
    dialog_model = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialogModel", ctx)
    dialog_model.Width = 180
    dialog_model.Height = 150
    dialog_model.Title = "Vorschläge übernehmen"
    # ListBox
    lb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlListBoxModel")
    lb_model.Name = "ProposalList"
    lb_model.PositionX = 10
    lb_model.PositionY = 10
    lb_model.Width = 160
    lb_model.Height = 80
    lb_model.StringItemList = tuple(proposals)
    dialog_model.insertByName("ProposalList", lb_model)
    # Checkbox
    cb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlCheckBoxModel")
    cb_model.Name = "AllCheck"
    cb_model.PositionX = 10
    cb_model.PositionY = 95
    cb_model.Width = 160
    cb_model.Height = 15
    cb_model.Label = "Alle Vorschläge übernehmen"
    dialog_model.insertByName("AllCheck", cb_model)
    # OK-Button
    btn_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
    btn_model.Name = "OKButton"
    btn_model.PositionX = 10
    btn_model.PositionY = 115
    btn_model.Width = 80
    btn_model.Height = 20
    btn_model.Label = "OK"
    dialog_model.insertByName("OKButton", btn_model)
    # Abbrechen-Button
    cancel_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
    cancel_model.Name = "CancelButton"
    cancel_model.PositionX = 100
    cancel_model.PositionY = 115
    cancel_model.Width = 80
    cancel_model.Height = 20
    cancel_model.Label = "Abbrechen"
    dialog_model.insertByName("CancelButton", cancel_model)
    # Control Dialog
    dialog = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialog", ctx)
    dialog.setModel(dialog_model)
    dialog.setVisible(True)
    toolkit.createPeer(dialog, None)
    # Warten auf OK
    while True:
        import time
        time.sleep(0.1)
        # Prüfen auf Klick
        if dialog.getControl("OKButton").Pressed:
            all_flag = dialog.getControl("AllCheck").State == 1
            selected_idx = dialog.getControl("ProposalList").SelectedItems
            if selected_idx:
                selected_proposal = proposals[selected_idx[0]]
            else:
                selected_proposal = None
            break
        elif dialog.getControl("CancelButton").Pressed:
            dialog.endExecute()
            return
    # Anwenden
    obj_cell = sheet.getCellByPosition(objekt_col, cell.RangeAddress.StartRow)
    obj_text = obj_cell.String
    if all_flag:
        for prop in proposals:
            idx = obj_text.lower().find(prop.lower())
            if idx != -1:
                obj_text = obj_text[:idx] + prop + obj_text[idx+len(prop):]
    else:
        if selected_proposal:
            idx = obj_text.lower().find(selected_proposal.lower())
            if idx != -1:
                obj_text = obj_text[:idx] + selected_proposal + obj_text[idx+len(selected_proposal):]
    obj_cell.String = obj_text
    obj_cell.CellBackColor = 0x00FF00  # grün
    dialog.endExecute()
    save_cache()
    log(f"Vorschlag übernommen: {obj_text}")
 # =========================
 # Automatische Button-Registrierung
 # =========================
 def register_toolbar_button():
    ctx = uno.getComponentContext()
    smgr = ctx.ServiceManager
    desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
    doc = desktop.getCurrentComponent()
    frame = doc.CurrentController.Frame
    # Button kann manuell über Makro-Menü an Toolbar gebunden werden
    # Hier wird nur das Makro selbst registriert
    # Symbolleiste muss in LO einmalig erstellt werden
 # =========================
 # Hauptmakro
 # =========================
 def run_mapper_macro():
    try:
        norm_dict = load_normvokabular(NV_MASTER_FILE)
        log(f"NV_MASTER geladen ({len(norm_dict)} Begriffe)")
        apply_proposals_dialog()
    except Exception as e:
        log(f"Fehler in run_mapper_macro: {e}")
--- a/mapper_macro_2.3.py
+++ b/mapper_macro_2.3.py
@ -0,0 +1,379 @@
 # -*- coding: utf-8 -*-
 # LibreOffice/Excel Macro: NV_MASTER-Abgleich, Pandas+odf, Cache, Farben
 # Version 2.3 – mit "Kein_Treffer" Spalte
 # Speicherort: libreoffice/4/user/Scripts/python/Vokabular_Abgleich_Makro/mapper_macro_2.3.py
 import os
 import re
 import json
 import traceback
 # UNO-Context wird zur Laufzeit zur Verfügung gestellt (XSCRIPTCONTEXT)
 try:
    import pandas as pd
    PANDAS_AVAILABLE = True
 except Exception:
    PANDAS_AVAILABLE = False
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except Exception:
    SPACY_AVAILABLE = False
    nlp = None
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except Exception:
    RAPIDFUZZ_AVAILABLE = False
    from difflib import SequenceMatcher
 # ------------------------
 # Konfiguration
 # ------------------------
 BASE_DIR = "/home/jarnold/.config/libreoffice/4/user/Scripts/python/Vokabular_Abgleich_Makro"
 NV_MASTER_PATH = os.path.join(BASE_DIR, "NV_MASTER.ods")
 LOG_FILE = os.path.join(BASE_DIR, "mapper_macro_2.3.log")
 CACHE_FILE = os.path.join(BASE_DIR, "mapper_cache_2.3.json")
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75  # Basis-Schwelle für Vorschläge
 # ------------------------
 # Logging
 # ------------------------
 def log(msg):
    try:
        with open(LOG_FILE, "a", encoding="utf-8") as f:
            f.write(msg + "\n")
    except Exception:
        pass
 # ------------------------
 # Cache laden
 # ------------------------
 try:
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, "r", encoding="utf-8") as f:
            CACHE = json.load(f)
    else:
        CACHE = {}
 except Exception:
    CACHE = {}
 # ------------------------
 # Text-Normalisierung & Lemma
 # ------------------------
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).strip().lower()
    s = re.sub(r"[\(\)\[\]\"'\\;:\?!,\.]", "", s)
    s = re.sub(r"\s+", " ", s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        try:
            doc = nlp(term_norm)
            lemma = " ".join([token.lemma_ for token in doc])
        except Exception:
            lemma = term_norm
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 # ------------------------
 # NV_MASTER laden
 # ------------------------
 def build_norm_index(nv_path):
    norm_dict = {}
    lemma_index = {}
    if not PANDAS_AVAILABLE:
        log("Pandas nicht verfügbar. NV_MASTER kann nicht gelesen werden.")
        return norm_dict, lemma_index
    try:
        sheets = pd.read_excel(nv_path, sheet_name=None, engine="odf")
    except Exception as e:
        log(f"Fehler beim Einlesen NV_MASTER: {e}")
        return norm_dict, lemma_index
    for sheet_name, df in sheets.items():
        if str(sheet_name).strip().lower() == "master":
            continue
        df = df.fillna("")
        cols = [str(c).strip().lower() for c in df.columns]
        id_col = None
        word_col = None
        for i, c in enumerate(cols):
            if "id" in c:
                id_col = df.columns[i]
            if "wort" in c or "vokabel" in c:
                word_col = df.columns[i]
        if word_col is None and len(df.columns) >= 1:
            word_col = df.columns[-1]
        if id_col is None and len(df.columns) >= 1:
            id_col = df.columns[0]
        current_parent_id = None
        for _, row in df.iterrows():
            id_val = str(row[id_col]).strip() if id_col in df.columns else ""
            word_val = str(row[word_col]).strip() if word_col in df.columns else ""
            if id_val:
                current_parent_id = id_val
            if not word_val:
                continue
            norm_name = normalize_text(word_val)
            lemma = lemmatize_term(word_val)
            entry = {"Name": word_val.strip(), "ID": current_parent_id or "", "Sheet": sheet_name}
            norm_dict.setdefault(norm_name, []).append(entry)
            lemma_index.setdefault(lemma, []).append(entry)
    log(f"NV_MASTER geladen ({NV_MASTER_PATH}). Begriffe: {sum(len(v) for v in norm_dict.values())}")
    return norm_dict, lemma_index
 # ------------------------
 # Matching
 # ------------------------
 def fuzzy_score(a, b):
    if RAPIDFUZZ_AVAILABLE:
        try:
            return fuzz.token_set_ratio(a, b) / 100.0
        except Exception:
            return 0.0
    else:
        try:
            return SequenceMatcher(None, a.lower(), b.lower()).ratio()
        except Exception:
            return 0.0
 def get_suggestions_for_term(term_lemma, norm_dict, lemma_index, threshold=CONF_THRESHOLD):
    candidates = []
    for key_lemma, entries in lemma_index.items():
        score = fuzzy_score(term_lemma, key_lemma)
        if key_lemma.startswith(term_lemma):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    for norm_key, entries in norm_dict.items():
        score = fuzzy_score(term_lemma, norm_key)
        if norm_key.startswith(term_lemma):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    candidates.sort(key=lambda t: t[0], reverse=True)
    seen = set()
    results = []
    for score, name, id_ in candidates:
        key = (name, id_)
        if key in seen:
            continue
        seen.add(key)
        results.append({"score": score, "name": name, "id": id_})
    return [f'{r["name"]} ({r["id"]})' if r["id"] else r["name"] for r in results]
 def map_term_with_indexes(term, norm_dict, lemma_index):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    if term_lemma in CACHE:
        cached = CACHE[term_lemma]
        return cached.get("hits", []), cached.get("suggestions", []), cached.get("ids", [])
    hits = []
    suggestions = []
    ids = []
    if term_norm in norm_dict:
        for e in norm_dict[term_norm]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    if not hits and term_lemma in lemma_index:
        for e in lemma_index[term_lemma]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    if not hits:
        suggestions = get_suggestions_for_term(term_lemma, norm_dict, lemma_index, threshold=CONF_THRESHOLD)
    def unique_preserve(seq):
        seen = set()
        out = []
        for x in seq:
            if x not in seen:
                seen.add(x)
                out.append(x)
        return out
    hits = unique_preserve(hits)
    suggestions = unique_preserve(suggestions)
    ids = unique_preserve(ids)
    CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
    return hits, suggestions, ids
 # ------------------------
 # Haupt-Makro
 # ------------------------
 def run_mapper_macro():
    try:
        doc = XSCRIPTCONTEXT.getDocument()
        sheet = doc.CurrentController.ActiveSheet
        cursor = sheet.createCursor()
        cursor.gotoStartOfUsedArea(False)
        cursor.gotoEndOfUsedArea(True)
        data_range = cursor.getRangeAddress()
    except Exception as e:
        log("Fehler: konnte Dokument/Sheet nicht öffnen: " + str(e))
        return
    # Header finden
    header_row = None
    objekt_col = None
    max_col = data_range.EndColumn
    for r in range(0, min(5, data_range.EndRow+1)):
        for c in range(0, max_col+1):
            try:
                val = str(sheet.getCellByPosition(c, r).String).strip().lower()
            except Exception:
                val = ""
            if val == "objektbeschreibung":
                header_row = r
                objekt_col = c
                break
        if objekt_col is not None:
            break
    if objekt_col is None:
        log("Spalte 'Objektbeschreibung' nicht gefunden. Abbruch.")
        return
    # Spalten anlegen
    existing = {}
    for c in range(0, data_range.EndColumn+1):
        try:
            h = str(sheet.getCellByPosition(c, header_row).String).strip()
        except Exception:
            h = ""
        if h == "Norm_Treffer":
            existing["Norm_Treffer"] = c
        if h == "Norm_Vorschlag":
            existing["Norm_Vorschlag"] = c
    last_col = data_range.EndColumn
    if "Norm_Treffer" not in existing:
        last_col += 1
        existing["Norm_Treffer"] = last_col
        sheet.getCellByPosition(last_col, header_row).String = "Norm_Treffer"
    if "Norm_Vorschlag" not in existing:
        last_col += 1
        existing["Norm_Vorschlag"] = last_col
        sheet.getCellByPosition(last_col, header_row).String = "Norm_Vorschlag"
    # Neue Spalte "Kein_Treffer"
    if "Kein_Treffer" not in existing:
        last_col += 1
        existing["Kein_Treffer"] = last_col
        sheet.getCellByPosition(last_col, header_row).String = "Kein_Treffer"
    norm_tr_col = existing["Norm_Treffer"]
    norm_sug_col = existing["Norm_Vorschlag"]
    kein_tr_col = existing["Kein_Treffer"]
    norm_dict, lemma_index = build_norm_index(NV_MASTER_PATH)
    if not norm_dict and not lemma_index:
        log("NV_MASTER leer oder nicht lesbar. Abbruch.")
        return
    GREEN = 0xADFF2F
    YELLOW = 0xFFA500
    RED = 0xCC0000
    WHITE = 0xFFFFFF
    rows_processed = 0
    for r in range(header_row + 1, data_range.EndRow + 1):
        try:
            cell = sheet.getCellByPosition(objekt_col, r)
            txt = str(cell.String).strip()
            if not txt:
                continue
            clauses = [c.strip() for c in re.split(r",", txt) if c.strip()]
            terms = []
            for cl in clauses:
                parts = [p.strip() for p in re.split(r"\s+", cl) if p.strip()]
                for p in parts:
                    if p.lower() in STOPWORDS:
                        continue
                    if re.fullmatch(r"\d+", p):
                        continue
                    terms.append(p)
            row_hits = []
            row_sugs = []
            row_ids = []
            unmapped_terms = []
            for term in terms:
                hits, sugs, ids = map_term_with_indexes(term, norm_dict, lemma_index)
                if hits:
                    row_hits.extend([f"{h} ({id_})" if id_ else h for h,id_ in zip(hits, ids + [""]*len(hits))])
                else:
                    unmapped_terms.append(term)
                    if sugs:
                        row_sugs.extend([f"{s}" for s in sugs])
                if ids:
                    row_ids.extend(ids)
            def uniq(seq):
                seen = set()
                out = []
                for x in seq:
                    if x not in seen:
                        seen.add(x)
                        out.append(x)
                return out
            row_hits = uniq(row_hits)
            row_sugs = uniq(row_sugs)
            unmapped_terms = uniq(unmapped_terms)
            # Farb-Logik Objektbeschreibung
            if terms and not unmapped_terms and row_hits:
                cell.CellBackColor = GREEN
                row_sugs = []  # keine Vorschläge wenn alles Treffer
            elif row_hits:
                cell.CellBackColor = YELLOW
            else:
                cell.CellBackColor = RED
            # Norm_Treffer
            tr_cell = sheet.getCellByPosition(norm_tr_col, r)
            tr_cell.String = " | ".join(row_hits)
            tr_cell.CellBackColor = GREEN if row_hits else WHITE
            # Norm_Vorschlag
            sug_cell = sheet.getCellByPosition(norm_sug_col, r)
            sug_cell.String = " | ".join(row_sugs)
            sug_cell.CellBackColor = YELLOW if row_sugs else WHITE
            # Kein_Treffer
            kt_cell = sheet.getCellByPosition(kein_tr_col, r)
            kt_cell.String = " | ".join(unmapped_terms)
            kt_cell.CellBackColor = RED if unmapped_terms else WHITE
            rows_processed += 1
        except Exception as e:
            log(f"Fehler in Zeile {r}: {e}\n{traceback.format_exc()}")
    try:
        with open(CACHE_FILE, "w", encoding="utf-8") as f:
            json.dump(CACHE, f, ensure_ascii=False, indent=2)
    except Exception:
        pass
    log(f"run_mapper_macro fertig. Zeilen verarbeitet: {rows_processed}")
 # Export für LibreOffice
 g_exportedScripts = (run_mapper_macro,)
--- a/normmapper_macro.py
+++ b/normmapper_macro.py
@ -1,121 +0,0 @@
 import uno
 import json
 import subprocess
 from pathlib import Path
 from com.sun.star.awt import XActionListener
 # Farbwerte (BGR)
 GREEN = 0xC6EFCE
 RED = 0xFFC7CE
 YELLOW = 0xFFEB9C
 def get_objektbeschreibung_column(sheet):
    """Findet die Spalte 'Objektbeschreibung'."""
    for row in range(sheet.Rows.Count):
        for col in range(sheet.Columns.Count):
            cell = sheet.getCellByPosition(col, row)
            if cell.String.strip().lower() == "objektbeschreibung":
                return col
    return None
 def update_cell_color(cell, status):
    """Färbt die Zelle."""
    if status == "grün":
        cell.CellBackColor = GREEN
    elif status == "gelb":
        cell.CellBackColor = YELLOW
    else:
        cell.CellBackColor = RED
 def call_mapper(term):
    """Ruft den lokalen Wrapper auf."""
    wrapper = Path("/home/jarnold/projects/GND-Skript Test/NormVokabular_Mapper_Wrapper.py")
    if not wrapper.exists():
        return {"term": term, "norm_name": "KEIN TREFFER", "norm_id": "", "suggestions": []}
    result = subprocess.run(
        ["python3", str(wrapper), term],
        capture_output=True,
        text=True
    )
    try:
        output = json.loads(result.stdout)
    except:
        output = {"term": term, "norm_name": "KEIN TREFFER", "norm_id": "", "suggestions": []}
    return output
 class SuggestionListener(XActionListener):
    """Listener für Klick auf Vorschlag-Button."""
    def __init__(self, cell, suggestion, dialog):
        self.cell = cell
        self.suggestion = suggestion
        self.dialog = dialog
    def actionPerformed(self, event):
        self.cell.String = self.suggestion
        update_cell_color(self.cell, "grün")
        self.dialog.endExecute()  # schließt das Dialogfenster
    def disposing(self, event):
        pass
 def show_suggestion_dialog(cell, term, suggestions):
    """Zeigt ein Dialog-Fenster mit klickbaren Vorschlägen."""
    ctx = XSCRIPTCONTEXT.getComponentContext()
    smgr = ctx.getServiceManager()
    toolkit = smgr.createInstance("com.sun.star.awt.Toolkit")
    dialog_model = smgr.createInstance("com.sun.star.awt.UnoControlDialogModel")
    dialog_model.PositionX = 100
    dialog_model.PositionY = 100
    dialog_model.Width = 200
    dialog_model.Height = 30 + 25*len(suggestions)
    dialog_model.Title = f"Vorschläge für '{term}'"
    for i, sugg in enumerate(suggestions[:3]):
        btn_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
        btn_model.Name = f"btn_{i}"
        btn_model.Label = sugg
        btn_model.PositionX = 10
        btn_model.PositionY = 10 + i*25
        btn_model.Width = 180
        btn_model.Height = 20
        dialog_model.insertByName(btn_model.Name, btn_model)
    dialog = smgr.createInstance("com.sun.star.awt.UnoControlDialog")
    dialog.setModel(dialog_model)
    dialog.setVisible(True)
    for i, sugg in enumerate(suggestions[:3]):
        btn = dialog.getControl(f"btn_{i}")
        listener = SuggestionListener(cell, sugg, dialog)
        btn.addActionListener(listener)
    toolkit.createDialog(dialog).execute()
 def mapper_process_column():
    """Verarbeitet alle Zellen unter 'Objektbeschreibung' in der aktiven Tabelle."""
    doc = XSCRIPTCONTEXT.getDocument()
    sheet = doc.CurrentController.ActiveSheet
    col_index = get_objektbeschreibung_column(sheet)
    if col_index is None:
        return
    for row in range(sheet.Rows.Count):
        cell = sheet.getCellByPosition(col_index, row)
        if not cell.String.strip():
            continue  # leere Zelle ignorieren
        term = cell.String.strip()
        result = call_mapper(term)
        if result["norm_name"] != "KEIN TREFFER":
            cell.String = result["norm_name"]
            update_cell_color(cell, "grün")
        elif result["suggestions"]:
            update_cell_color(cell, "gelb")
            show_suggestion_dialog(cell, term, result["suggestions"])
        else:
            update_cell_color(cell, "rot")
            show_suggestion_dialog(cell, term, [])
 # Export
 g_exportedScripts = mapper_process_column,
--- a/test.py
+++ b/test.py
		`@ -1 +0,0 @@`
			`,jarnold,workPC,10.10.2025 09:26,file:///home/jarnold/.config/libreoffice/4;`
		`@ -0,0 +1,2 @@`
							`Makro für die Erfassungstabelle, mit dem Vorschläge für Begriffe per Klick angenommen und ersetzt werden sollen.`
							`Funktioniert nicht in LibreOffice, müsste in excel aber laufen.`
		`@ -0,0 +1 @@`
							`,jarnold,workPC,16.10.2025 13:04,file:///home/jarnold/.config/libreoffice/4;`