initial commit

2025-10-10 09:46:41 +02:00 · 2025-10-10 09:46:41 +02:00 · 41e8b7103e
commit 41e8b7103e
15310 changed files with 5698406 additions and 0 deletions
--- a/Ergebnisse/mapper_log.txt
+++ b/Ergebnisse/mapper_log.txt
--- a/Ha-Ho.ods
+++ b/Ha-Ho.ods
--- a/Ha-Klinc.ods
+++ b/Ha-Klinc.ods
--- a/Hu-J.ods
+++ b/Hu-J.ods
--- a/Klinc.ods
+++ b/Klinc.ods
--- a/Ha-Klinc.ods
+++ b/Ha-Klinc.ods
--- a/CSV/Normvokabular_INTERN/.~lock.NV_MASTER.ods#
+++ b/CSV/Normvokabular_INTERN/.~lock.NV_MASTER.ods#
@ -0,0 +1 @@
 ,jarnold,workPC,10.10.2025 09:26,file:///home/jarnold/.config/libreoffice/4;
--- a/CSV/Normvokabular_INTERN/NV_MASTER.ods
+++ b/CSV/Normvokabular_INTERN/NV_MASTER.ods
--- a/Masterfile_Editor.py
+++ b/Masterfile_Editor.py
@ -0,0 +1,212 @@
 import os
 import re
 import logging
 import pandas as pd
 import ezodf
 from openpyxl.utils import get_column_letter
 from openpyxl.styles import Alignment
 # -------------------------------------------------
 # KONFIGURATION
 # -------------------------------------------------
 INPUT_FILE = r"/home/jarnold/projects/GND-Skript Test/Input CSV/Normvokabular_INTERN/NV_MASTER.ods"
 OUTPUT_FILE = r"/home/jarnold/projects/GND-Skript Test/Normvokabular_INTERN/NV_MASTER_Updated.ods"
 MASTER_SHEET_NAME = "Masterstruktur"
 SHEET_ORDER = [
    "Masterstruktur",
    "1 Figur",
    "2 Objekt",
    "3 Flora",
    "4 Fauna",
    "5 Landschaft",
    "6 Phänomene, Erscheinungen",
    "7 Architektur",
    "8 Verzierungen, Ornamentik",
    "9 Aktivität, Handlung, Pose"
 ]
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 # -------------------------------------------------
 # HELFERFUNKTIONEN
 # -------------------------------------------------
 def detect_id_and_name(df):
    df_cols = [str(c).strip().lower() for c in df.columns]
    id_col, name_col = None, None
    for idx, col in enumerate(df_cols):
        if col == "id":
            id_col = df.columns[idx]
        elif col in ["name", "wort", "wort/vokabel"]:
            name_col = df.columns[idx]
    if id_col is None or name_col is None:
        logging.warning(f"Sheet hat keine ID oder Name/Wort-Spalte: {df.columns}")
    return id_col, name_col
 def parse_id_level(id_val):
    if pd.isna(id_val):
        return None
    id_str = str(id_val).strip()
    if re.match(r'^\d+(\.\d+){0,2}$', id_str):
        return len(id_str.split("."))
    return None
 def process_category_df(df, sheet_name):
    id_col, name_col = detect_id_and_name(df)
    if id_col is None or name_col is None:
        return None
    current_level = {2: None, 3: None}
    new_rows = []
    for _, row in df.iterrows():
        id_val = row[id_col] if pd.notna(row[id_col]) else ""
        name_val = row[name_col] if pd.notna(row[name_col]) else ""
        if not id_val and not name_val:
            continue
        level = parse_id_level(id_val)
        if level:
            if level >= 2:
                current_level[level] = name_val
            for deeper in range(level+1, 4):
                current_level[deeper] = None
            new_rows.append({
                "ID": id_val,
                "Unterkategorie": current_level[2] if level >= 2 else "",
                "Unterunterkategorie": current_level[3] if level >= 3 else "",
                "Wort/Vokabel": name_val
            })
        else:
            new_rows.append({
                "ID": "",
                "Unterkategorie": "",
                "Unterunterkategorie": "",
                "Wort/Vokabel": name_val
            })
    df_new = pd.DataFrame(new_rows, columns=["ID", "Unterkategorie", "Unterunterkategorie", "Wort/Vokabel"])
    logging.info(f"Sheet '{sheet_name}' verarbeitet: {len(df_new)} Zeilen")
    return df_new
 def merge_new_terms(original_df, processed_df):
    """Fügt neue Wörter aus original_df (ohne ID) in processed_df ein, wenn sie noch nicht vorhanden sind."""
    _, orig_name_col = detect_id_and_name(original_df)
    if orig_name_col is None or orig_name_col not in original_df.columns:
        return processed_df
    existing_words = set(str(x).strip().lower() for x in processed_df["Wort/Vokabel"].dropna())
    new_rows = []
    for _, row in original_df.iterrows():
        name = str(row.get(orig_name_col, "")).strip()
        id_val = str(row.get("ID", "")).strip() if "ID" in row else ""
        if not name:
            continue
        if not id_val and name.lower() not in existing_words:
            new_rows.append({"ID": "", "Unterkategorie": "", "Unterunterkategorie": "", "Wort/Vokabel": name})
    if new_rows:
        df_new = pd.concat([processed_df, pd.DataFrame(new_rows)], ignore_index=True)
        logging.info(f"{len(new_rows)} neue Wörter übernommen.")
        return df_new
    return processed_df
 def build_master_df(category_dfs):
    seen_ids = set()
    master_rows = []
    for df in category_dfs:
        for _, row in df.iterrows():
            id_val = row["ID"]
            name_val = row["Wort/Vokabel"]
            if id_val and id_val not in seen_ids:
                seen_ids.add(id_val)
                master_rows.append({"ID": id_val, "Name": name_val})
    master_df = pd.DataFrame(master_rows)
    logging.info(f"Masterstruktur enthält {len(master_df)} eindeutige IDs")
    return master_df
 # -------------------------------------------------
 # FORMATIERUNG UND SPEICHERN
 # -------------------------------------------------
 def format_excel_sheet(df, sheet_name, writer):
    df.to_excel(writer, sheet_name=sheet_name, index=False)
    worksheet = writer.sheets[sheet_name]
    for col_idx, col in enumerate(df.columns, 1):
        max_len = max([len(str(cell)) if cell is not None else 0 for cell in df[col]])
        max_len = max(max_len, len(col)) + 2
        worksheet.column_dimensions[get_column_letter(col_idx)].width = max_len
        for row_idx in range(1, len(df) + 2):
            worksheet.cell(row=row_idx, column=col_idx).alignment = Alignment(horizontal='left')
 def save_ods(processed_sheets, output_file):
    doc = ezodf.newdoc(doctype="ods")
    for name, df in processed_sheets.items():
        df = df.fillna("")
        sheet = ezodf.Sheet(name, size=(len(df) + 1, len(df.columns)))
        doc.sheets += sheet
        for col_idx, col_name in enumerate(df.columns):
            sheet[0, col_idx].set_value(str(col_name))
        for row_idx, row in enumerate(df.itertuples(index=False), start=1):
            for col_idx, value in enumerate(row):
                if value is None or str(value).lower() == "nan":
                    value = ""
                sheet[row_idx, col_idx].set_value(str(value))
    doc.saveas(output_file)
    logging.info(f"ODS-Datei gespeichert: {output_file}")
 # -------------------------------------------------
 # HAUPTPROGRAMM
 # -------------------------------------------------
 def main():
    if not os.path.exists(INPUT_FILE):
        logging.error(f"Datei {INPUT_FILE} existiert nicht.")
        return
    ext = os.path.splitext(INPUT_FILE)[1].lower()
    engine = None
    if ext in [".xlsx", ".xls"]:
        engine = "openpyxl"
    elif ext == ".ods":
        engine = "odf"
    else:
        logging.error("Nicht unterstütztes Dateiformat")
        return
    logging.info(f"Lade Datei {INPUT_FILE} mit Engine '{engine}'")
    xls = pd.ExcelFile(INPUT_FILE, engine=engine)
    processed_sheets = {}
    category_dfs = []
    for sheet_name in xls.sheet_names:
        if sheet_name == MASTER_SHEET_NAME:
            continue
        df = pd.read_excel(xls, sheet_name=sheet_name, engine=engine)
        df_new = process_category_df(df, sheet_name)
        if df_new is not None:
            df_merged = merge_new_terms(df, df_new)
            processed_sheets[sheet_name] = df_merged
            category_dfs.append(df_merged)
        else:
            processed_sheets[sheet_name] = df
    master_df = build_master_df(category_dfs)
    processed_sheets[MASTER_SHEET_NAME] = master_df
    ordered_sheets = {name: processed_sheets[name] for name in SHEET_ORDER if name in processed_sheets}
    ext_out = os.path.splitext(OUTPUT_FILE)[1].lower()
    if ext_out in [".xlsx", ".xls"]:
        with pd.ExcelWriter(OUTPUT_FILE, engine="openpyxl") as writer:
            for name, df in ordered_sheets.items():
                format_excel_sheet(df, name, writer)
        logging.info(f"Excel-Datei gespeichert: {OUTPUT_FILE}")
    elif ext_out == ".ods":
        save_ods(ordered_sheets, OUTPUT_FILE)
 if __name__ == "__main__":
    main()
--- a/NV_MASTER.ods
+++ b/NV_MASTER.ods
--- a/NV_MASTER_Updated.ods
+++ b/NV_MASTER_Updated.ods
--- a/NV_Master_EditorFAIL.py
+++ b/NV_Master_EditorFAIL.py
@ -0,0 +1,171 @@
 import os
 import re
 import logging
 import datetime
 import pandas as pd
 from openpyxl.utils import get_column_letter
 from openpyxl.styles import Alignment
 import ezodf
 # ----------------- KONFIGURATION -----------------
 INPUT_FILE = r"/home/jarnold/projects/GND-Skript Test/Input CSV/Normvokabular_INTERN/NV_MASTER.ods"
 MASTER_SHEET_NAME = "Masterstruktur"
 today = datetime.datetime.today().strftime("%y.%m.%d")
 base, ext = os.path.splitext(INPUT_FILE)
 OUTPUT_FILE = f"{base}_Updated_{today}{ext}"
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 # ----------------- HILFSFUNKTIONEN -----------------
 def load_file(input_file):
    """
    Prüft Dateiformat und gibt für Excel: pd.ExcelFile + Engine zurück,
    für ODS: None + "odf" (da ODS direkt über ezodf gelesen wird).
    """
    ext = os.path.splitext(input_file)[1].lower()
    if ext in [".xlsx", ".xls"]:
        engine = "openpyxl"
        xls = pd.ExcelFile(input_file, engine=engine)
    elif ext == ".ods":
        engine = "odf"
        xls = None  # ODS wird direkt über ezodf gelesen
    else:
        raise ValueError(f"Nicht unterstütztes Dateiformat: {ext}")
    logging.info(f"Lade Datei {input_file} mit Engine '{engine}'")
    return xls, engine
 def read_ods_sheet(filename, sheet_name):
    """Liests ODS Sheet sauber ein, inklusive Header."""
    doc = ezodf.opendoc(filename)
    sheet = doc.sheets[sheet_name]
    data = []
    headers = [str(sheet[0, col].value).strip() for col in range(sheet.ncols())]
    for row_idx in range(1, sheet.nrows()):
        row = {}
        empty_row = True
        for col_idx, col_name in enumerate(headers):
            cell_val = sheet[row_idx, col_idx].value
            val = "" if cell_val is None else str(cell_val).strip()
            row[col_name] = val
            if val:
                empty_row = False
        if not empty_row:
            data.append(row)
    df = pd.DataFrame(data, columns=headers)
    return df
 def process_category_sheet(df):
    """Erstellt die treppenartige Hierarchie."""
    df = df.copy()
    for col in ["ID","Unterkategorie","Unterunterkategorie","Wort/Vokabel"]:
        if col not in df.columns:
            df[col] = ""
    rows = []
    current_id = ""
    current_uuk = ""
    for _, r in df.iterrows():
        id_val = str(r.get("ID","")).strip()
        uuk_val = str(r.get("Unterunterkategorie","")).strip()
        word_val = str(r.get("Wort/Vokabel","")).strip()
        if id_val:  # Kategoriezeile
            current_id = id_val
            current_uuk = uuk_val or word_val
            rows.append({"ID": current_id, "Unterkategorie": "", "Unterunterkategorie": current_uuk, "Wort/Vokabel": ""})
            continue
        if uuk_val:  # Unterunterkategorie
            current_uuk = uuk_val
            rows.append({"ID": "", "Unterkategorie": "", "Unterunterkategorie": current_uuk, "Wort/Vokabel": ""})
            continue
        if word_val:  # Vokabel
            rows.append({"ID": "", "Unterkategorie": "", "Unterunterkategorie": "", "Wort/Vokabel": word_val})
            continue
    return pd.DataFrame(rows, columns=["ID","Unterkategorie","Unterunterkategorie","Wort/Vokabel"])
 def remove_empty_vocabulary_rows(df):
    """Entfernt Zeilen, die nur leere Wort/Vokabel-Spalte haben."""
    return df[df["Wort/Vokabel"].astype(str).str.strip() != ""].copy().reset_index(drop=True)
 def sync_master_and_sheets(master_df, category_dfs):
    """Synchronisiert Kategorien nach Master, Vokabeln bleiben erhalten."""
    master_df = master_df.copy()
    master_df["ID"] = master_df["ID"].astype(str).str.strip()
    master_dict = dict(zip(master_df["ID"], master_df["Kategorie"]))
    updated_dfs = {}
    summary = {}
    for sheet_name, df in category_dfs.items():
        rows_out = []
        changes = {"removed":0}
        for _, row in df.iterrows():
            id_val = str(row.get("ID","")).strip()
            if id_val and id_val not in master_dict:
                changes["removed"] +=1
                continue
            rows_out.append(row.to_dict())
        updated_dfs[sheet_name] = pd.DataFrame(rows_out, columns=df.columns)
        summary[sheet_name] = changes
    new_master = pd.DataFrame([{"ID":k,"Kategorie":v} for k,v in sorted(master_dict.items())])
    return new_master, updated_dfs, summary
 def save_excel(processed_sheets, output_file):
    from openpyxl import Workbook
    with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
        for sheet_name, df in processed_sheets.items():
            df.to_excel(writer, sheet_name=sheet_name, index=False)
            ws = writer.sheets[sheet_name]
            for col_idx, col in enumerate(df.columns,1):
                max_len = max(df[col].astype(str).map(len).max() if len(df)>0 else 0,len(col))+2
                ws.column_dimensions[get_column_letter(col_idx)].width = max_len
                for row_idx in range(1,len(df)+2):
                    ws.cell(row=row_idx,column=col_idx).alignment = Alignment(horizontal='left')
 def save_ods(processed_sheets, output_file):
    doc = ezodf.newdoc(doctype="ods", filename=output_file)
    for name, df in processed_sheets.items():
        sheet = ezodf.Sheet(name, size=(len(df)+1,len(df.columns)))
        doc.sheets += sheet
        for col_idx, col_name in enumerate(df.columns):
            sheet[0,col_idx].set_value(col_name)
        for row_idx,row in enumerate(df.itertuples(index=False),start=1):
            for col_idx,value in enumerate(row):
                sheet[row_idx,col_idx].set_value("" if pd.isna(value) else value)
    doc.save()
 # ----------------- HAUPTPROGRAMM -----------------
 def main():
    xls, engine = load_file(INPUT_FILE)
    if engine == "odf":
        doc = ezodf.opendoc(INPUT_FILE)
        sheet_names = [s.name for s in doc.sheets if s.name != MASTER_SHEET_NAME]
        category_dfs = {name: process_category_sheet(read_ods_sheet(INPUT_FILE,name)) for name in sheet_names}
        master_df = read_ods_sheet(INPUT_FILE, MASTER_SHEET_NAME)
    else:
        sheet_names = [s for s in xls.sheet_names if s != MASTER_SHEET_NAME]
        category_dfs = {}
        for sheet_name in sheet_names:
            df = pd.read_excel(xls, sheet_name=sheet_name, engine=engine)
            df.columns = [str(c).strip() for c in df.columns]
            category_dfs[sheet_name] = process_category_sheet(df)
        master_df = pd.read_excel(xls, sheet_name=MASTER_SHEET_NAME, engine=engine)
        master_df.columns = [str(c).strip() for c in master_df.columns]
    new_master, updated_dfs, summary = sync_master_and_sheets(master_df, category_dfs)
    processed_sheets = {MASTER_SHEET_NAME:new_master}
    processed_sheets.update({k:remove_empty_vocabulary_rows(v) for k,v in updated_dfs.items()})
    ext_out = os.path.splitext(OUTPUT_FILE)[1].lower()
    if ext_out in [".xlsx",".xls"]:
        save_excel(processed_sheets, OUTPUT_FILE)
    else:
        save_ods(processed_sheets, OUTPUT_FILE)
    logging.info(f"Datei gespeichert: {OUTPUT_FILE}")
    logging.info("===== SYNC SUMMARY =====")
    for sheet, info in summary.items():
        logging.info(f"{sheet}: {info}")
 if __name__ == "__main__":
    main()
--- a/NV_Master_to_SPOT.py
+++ b/NV_Master_to_SPOT.py
@ -0,0 +1,192 @@
 import os
 import json
 import datetime
 import pandas as pd
 import ezodf
 from openpyxl import Workbook
 from openpyxl.utils import get_column_letter
 from openpyxl.styles import Alignment
 import logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 # ---------------- SPOT-Baumstruktur ----------------
 class Node:
    def __init__(self, name, node_type="category", id=None):
        self.name = name
        self.id = id
        self.type = node_type  # "category", "subcategory", "word"
        self.children = []
    def add_child(self, child):
        self.children.append(child)
    def to_dict(self):
        if self.type == "word":
            return self.name
        return {
            "id": self.id,
            "name": self.name,
            "type": self.type,
            "children": [c.to_dict() for c in self.children]
        }
    @staticmethod
    def from_dict(d):
        if isinstance(d, str):
            return Node(d, "word")
        node = Node(d["name"], d.get("type", "category"), d.get("id"))
        node.children = [Node.from_dict(c) for c in d.get("children", [])]
        return node
 # ---------------- Funktionen zum Laden ----------------
 def load_excel_or_ods(input_file, master_sheet="Masterstruktur"):
    ext = os.path.splitext(input_file)[1].lower()
    engine = "openpyxl" if ext in [".xlsx", ".xls"] else "odf"
    xls = pd.ExcelFile(input_file, engine=engine)
    sheet_names = [s for s in xls.sheet_names if s != master_sheet]
    dfs = {s: pd.read_excel(xls, sheet_name=s, engine=engine) for s in sheet_names}
    master_df = pd.read_excel(xls, sheet_name=master_sheet, engine=engine)
    return master_df, dfs
 # ---------------- Baum aus Sheet erstellen ----------------
 def process_sheet_to_tree(df):
    df = df.fillna("").astype(str)
    tree_nodes = []
    current_cat = None
    current_sub = None
    for idx, row in df.iterrows():
        id_val = row.get("ID", "").strip()
        uk_val = row.get("Unterkategorie", "").strip()
        uuk_val = row.get("Unterunterkategorie", "").strip()
        word_val = row.get("Wort/Vokabel", "").strip()
        if id_val:
            current_cat = Node(uk_val or word_val, "category", id=id_val)
            tree_nodes.append(current_cat)
            current_sub = None
        elif uuk_val:
            current_sub = Node(uuk_val, "subcategory")
            if current_cat:
                current_cat.add_child(current_sub)
        elif word_val:
            word_node = Node(word_val, "word")
            if current_sub:
                current_sub.add_child(word_node)
            elif current_cat:
                current_cat.add_child(word_node)
    return tree_nodes
 # ---------------- SPOT laden/speichern ----------------
 def save_spot_json(tree_nodes, file_path):
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump([n.to_dict() for n in tree_nodes], f, indent=2, ensure_ascii=False)
    logging.info(f"SPOT gespeichert: {file_path}")
 def load_spot_json(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return [Node.from_dict(n) for n in data]
 # ---------------- Export in Excel ----------------
 def export_spot_to_excel(tree_nodes, output_file):
    wb = Workbook()
    wb.remove(wb.active)
    for node in tree_nodes:
        ws = wb.create_sheet(title=node.name[:31])
        row_idx = 1
        # Kategorie
        ws.cell(row=row_idx, column=1, value=node.id)
        ws.cell(row=row_idx, column=2, value=node.name)
        row_idx += 1
        for sub in node.children:
            if sub.type == "subcategory":
                ws.cell(row=row_idx, column=3, value=sub.name)
                row_idx += 1
                for word in sub.children:
                    ws.cell(row=row_idx, column=4, value=word.name)
                    row_idx += 1
            elif sub.type == "word":
                ws.cell(row=row_idx, column=4, value=sub.name)
                row_idx += 1
        # Spaltenbreiten anpassen
        for col_idx, col_letter in enumerate(["A","B","C","D"],1):
            ws.column_dimensions[col_letter].width = 20
            for r in range(1,row_idx):
                ws.cell(r,col_idx).alignment = Alignment(horizontal='left')
    wb.save(output_file)
    logging.info(f"Excel exportiert: {output_file}")
 # ---------------- Export in ODS ----------------
 def export_spot_to_ods(tree_nodes, output_file):
    doc = ezodf.newdoc(doctype="ods", filename=output_file)
    for node in tree_nodes:
        sheet = ezodf.Sheet(node.name[:31], size=(len(node.children)+10,4))
        doc.sheets += sheet
        sheet[0,0].set_value("ID")
        sheet[0,1].set_value("Unterkategorie")
        sheet[0,2].set_value("Unterunterkategorie")
        sheet[0,3].set_value("Wort/Vokabel")
        row_idx = 1
        sheet[row_idx,0].set_value(node.id)
        sheet[row_idx,1].set_value(node.name)
        row_idx +=1
        for sub in node.children:
            if sub.type == "subcategory":
                sheet[row_idx,2].set_value(sub.name)
                row_idx +=1
                for word in sub.children:
                    sheet[row_idx,3].set_value(word.name)
                    row_idx +=1
            elif sub.type == "word":
                sheet[row_idx,3].set_value(sub.name)
                row_idx +=1
    doc.save()
    logging.info(f"ODS exportiert: {output_file}")
 # ---------------- CLI-Funktionen zum Editieren ----------------
 def add_category(tree_nodes, cat_id, cat_name):
    tree_nodes.append(Node(cat_name, "category", id=cat_id))
    logging.info(f"Kategorie hinzugefügt: {cat_id} {cat_name}")
 def add_subcategory(tree_nodes, cat_id, sub_name):
    for cat in tree_nodes:
        if cat.id == cat_id:
            cat.add_child(Node(sub_name, "subcategory"))
            logging.info(f"Unterkategorie hinzugefügt: {sub_name} in {cat_id}")
            return
 def add_word(tree_nodes, cat_id, sub_name, word_name):
    for cat in tree_nodes:
        if cat.id == cat_id:
            for sub in cat.children:
                if sub.name == sub_name:
                    sub.add_child(Node(word_name, "word"))
                    logging.info(f"Wort hinzugefügt: {word_name} unter {sub_name}")
                    return
 # ---------------- HAUPTPROGRAMM ----------------
 def main():
    INPUT_FILE = "NV_MASTER.ods"   # Beispielpfad
    OUTPUT_SPOT = "nv_spot.json"
    today = datetime.datetime.today().strftime("%y.%m.%d")
    OUTPUT_EXCEL = f"NV_MASTER_SPOT_{today}.xlsx"
    OUTPUT_ODS = f"NV_MASTER_SPOT_{today}.ods"
    master_df, dfs = load_excel_or_ods(INPUT_FILE)
    spot_tree = []
    for sheet, df in dfs.items():
        spot_tree.extend(process_sheet_to_tree(df))
    save_spot_json(spot_tree, OUTPUT_SPOT)
    # Beispiel: Editieren
    # add_category(spot_tree, "10.1", "Neue Kategorie")
    # add_subcategory(spot_tree, "10.1", "Neue Unterunterkategorie")
    # add_word(spot_tree, "10.1", "Neue Unterunterkategorie", "Neues Wort")
    export_spot_to_excel(spot_tree, OUTPUT_EXCEL)
    export_spot_to_ods(spot_tree, OUTPUT_ODS)
    logging.info("SPOT-Workflow abgeschlossen.")
 if __name__ == "__main__":
    main()
--- a/NormVokabular_Mapper_1.1.py
+++ b/NormVokabular_Mapper_1.1.py
@ -0,0 +1,449 @@
 """
 ========================================================================
 NormVokabular Mapper – Übersicht
 ========================================================================
 Dieses Skript dient dazu, Begriffe aus Datenbeständen mit einem 
 vordefinierten Normvokabular abzugleichen. Es identifiziert Treffer, 
 gibt bei fehlenden Treffern Vorschläge aus und ermöglicht optional 
 einen Abgleich mit externen APIs (GND, Wikidata).
 Hauptfunktionen:
 1. **Input verarbeiten**
   - Liest CSV-, Excel- und ODS-Dateien aus dem Ordner "Input CSV".
   - Extrahiert relevante Begriffe aus Spalten wie "Objektbeschreibung", 
     filtert Stopwords und Zahlen.
 2. **Normvokabular laden**
   - Liest die Masterdatei NV_MASTER.ods ein.
   - Berücksichtigt Hierarchie-IDs, um übergeordnete Begriffe zuordnen zu können.
   - Erstellt ein Index für gestemmte Begriffe, um auch ähnliche Schreibweisen zu erkennen.
 3. **Mapping auf Normvokabular**
   - Prüft, ob ein Begriff exakt oder gestemmt im Normvokabular vorkommt.
   - Wenn kein Treffer vorliegt, werden alternative Vorschläge generiert.
 4. **API-Abgleich (optional)**
   - Fragt GND und Wikidata ab, um den Top-1 Treffer für jeden Begriff zu ermitteln.
   - Nutzt einen Cache, um wiederholte Requests zu vermeiden.
   - Bietet einen Dry-Run-Modus für Tests ohne Internetzugang.
 5. **Ergebnis speichern**
   - Speichert die Auswertung in einem eigenen Ordner "Auswertung Ergebnisse".
   - Markiert Treffer visuell: grün = Treffer, rot = kein Treffer (bei Excel), 
     bzw. fügt Statusspalte bei ODS-Dateien hinzu.
   - Enthält alle relevanten Informationen pro Begriff: Originalbegriff, Normbegriff, 
     Norm-ID, Vorschläge, GND/Wikidata Top1 Treffer.
 6. **Logging**
   - Informiert über Fortschritt, Anzahl der Begriffe, Treffer und mögliche Fehler.
 **Nutzung:**
 ```bash
 python normvokabular_mapper.py
 python normvokabular_mapper.py --dry-run  # nur Simulation der API-Abfragen
 """
 import os
 import sys
 import time
 import json
 import re
 import requests
 import pandas as pd
 from pathlib import Path
 from difflib import SequenceMatcher
 import argparse
 from collections import defaultdict
 # =========================
 # Argumente / Dry-Run
 # =========================
 parser = argparse.ArgumentParser()
 parser.add_argument('--dry-run', action='store_true', help='API-Abfragen simulieren')
 args = parser.parse_args()
 DRY_RUN = args.dry_run
 # =========================
 # Konfiguration
 # =========================
 INPUT_DIR = Path("Input CSV")
 OUTPUT_DIR = Path("Auswertung Ergebnisse")
 OUTPUT_DIR.mkdir(exist_ok=True)
 NORMVOC_FILE = Path("Input CSV/Normvokabular_INTERN/NV_MASTER.ods")
 TIMEOUT = 5
 MAX_RETRIES = 3
 BACKOFF_FACTOR = 2
 CACHE_FILE = "api_cache.json"
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 API_ACTIVE = {"gnd": True, "wikidata": True}
 FAIL_COUNTER = {"gnd":0, "wikidata":0}
 HEADERS = {"User-Agent": "VLG_API_multi/1.0 (projekt@example.com)"}
 CONF_THRESHOLD = 0.75  # für Vorschläge
 # =========================
 # Logging
 # =========================
 def log(level, msg):
    ts = time.strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{ts}] [{level}] {msg}")
 # =========================
 # Cache laden / speichern
 # =========================
 if os.path.exists(CACHE_FILE):
    try:
        with open(CACHE_FILE,"r",encoding="utf-8") as f:
            CACHE = json.load(f)
        log("INFO", f"Cache geladen: {CACHE_FILE}")
    except:
        CACHE = {}
 else:
    CACHE = {}
 def save_cache():
    try:
        with open(CACHE_FILE,"w",encoding="utf-8") as f:
            json.dump(CACHE, f, indent=2, ensure_ascii=False)
        log("DEBUG","Cache gespeichert")
    except Exception as e:
        log("ERROR", f"Cache speichern fehlgeschlagen: {e}")
 # =========================
 # Normalisierung / Stemming
 # =========================
 try:
    from nltk.stem.snowball import GermanStemmer
    STEMMER = GermanStemmer()
    log("INFO","NLTK GermanStemmer verfügbar")
 except:
    STEMMER = None
    log("WARNING","NLTK nicht verfügbar, naive Pluralreduktion wird genutzt")
 def normalize_text(s):
    if s is None:
        return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 def naive_stem(w):
    for ending in ("ern","nen","en","er","e","n","s"):
        if w.endswith(ending) and len(w)-len(ending)>=3:
            return w[:-len(ending)]
    return w
 def stem_word(word):
    w = normalize_text(word)
    try:
        return STEMMER.stem(w) if STEMMER else naive_stem(w)
    except:
        return naive_stem(w)
 from collections import defaultdict
 from difflib import SequenceMatcher
 CONF_THRESHOLD = 0.75  # Confidence für Vorschläge
 # =========================
 # Normvokabular laden (NV_MASTER) mit Parent-ID & Stem-Index
 # =========================
 def load_normvokabular(file_path):
    import pandas as pd
    import re
    log("INFO", f"Normvokabular laden: {file_path}")
    engine = "odf" if file_path.suffix.lower() == ".ods" else None
    sheets = pd.read_excel(file_path, sheet_name=None, engine=engine)
    norm_dict = {}
    stem_index = defaultdict(list)
    count = 0
    for sheet_name, df in sheets.items():
        df.columns = [str(c).strip() for c in df.columns]
        current_parent_id = None
        for _, row in df.iterrows():
            # Spaltennamen flexibel anpassen
            id_val = str(row.get("ID","")).strip() if "ID" in df.columns else ""
            wort = str(row.get("Wort/Vokabel","")).strip() if "Wort/Vokabel" in df.columns else ""
            # Zeilen mit ID, aber ohne Vokabel → Update Parent-ID
            if id_val:
                current_parent_id = id_val
            # Skip leere Vokabeln
            if not wort:
                continue
            assigned_id = current_parent_id  # Parent-ID übernehmen
            key = normalize_text(wort)
            entry = {
                "Name": wort,
                "ID": assigned_id,
                "Sheet": sheet_name
            }
            norm_dict[key] = entry
            stem_index[stem_word(key)].append(entry)
            count += 1
    log("INFO", f"{count} Begriffe aus Normvokabular geladen")
    return norm_dict, stem_index
 # =========================
 # Mapping & Vorschläge
 # =========================
 def map_to_norm(term, norm_dict, stem_index):
    tnorm = normalize_text(term)
    tstem = stem_word(tnorm)
    # Exakter Treffer
    if tnorm in norm_dict:
        e = norm_dict[tnorm]
        return e["Name"], e["ID"], []
    # Gestemmter Treffer
    if tstem in stem_index:
        e = stem_index[tstem][0]
        return e["Name"], e["ID"], []
    # Kein Treffer → Vorschläge
    suggestions = get_suggestions(tnorm, norm_dict)
    return "KEIN TREFFER", "", suggestions
 def get_suggestions(term, norm_dict, top_n=3, threshold=CONF_THRESHOLD):
    t = term.lower()
    scores = []
    for key, val in norm_dict.items():
        score = SequenceMatcher(None, t, key).ratio()
        if score >= threshold:
            scores.append((score, val["Name"], val["ID"]))
    scores.sort(reverse=True)
    return [f"{name} ({id_})" for _, name, id_ in scores[:top_n]]
 # =========================
 # API-Abgleich (Top1) unverändert
 # =========================
 def request_with_retries(api_name,url,params=None):
    if DRY_RUN:
        return None
    cache_key = url + str(params)
    if cache_key in CACHE:
        return CACHE[cache_key]
    retries = 0
    while retries<MAX_RETRIES:
        try:
            r = requests.get(url,params=params,timeout=TIMEOUT,headers=HEADERS)
            if r.status_code==200:
                try: data=r.json()
                except: data=r.text
                CACHE[cache_key]=data
                FAIL_COUNTER[api_name]=0
                return data
        except:
            pass
        retries+=1
        time.sleep(min(BACKOFF_FACTOR**retries,30))
    FAIL_COUNTER[api_name]+=1
    if FAIL_COUNTER[api_name]>=10:
        API_ACTIVE[api_name]=False
    return None
 def compute_min_conf(term,api_name):
    l=len(term.strip())
    if l<=3: return 0.90
    if l<=6: return 0.85 if api_name=='gnd' else 0.80
    return 0.75 if api_name=='gnd' else 0.70
 def batch_query_gnd(terms):
    results={}
    if DRY_RUN or not API_ACTIVE.get("gnd",False):
        for t in terms: results[t]="TEST_GND"
        return results
    for t in terms:
        url="https://lobid.org/gnd/search"
        params={"q":t,"format":"json"}
        data=request_with_retries("gnd",url,params)
        top=""
        if data and "member" in data:
            min_conf=compute_min_conf(t,'gnd')
            cands=[]
            for doc in data["member"]:
                name=doc.get("preferredName","") or doc.get("name","")
                if not name: continue
                conf=SequenceMatcher(None,t.lower(),name.lower()).ratio()
                if conf>=min_conf: cands.append((name,conf))
            if cands:
                top=sorted(cands,key=lambda x:x[1],reverse=True)[0][0]
        results[t]=top
    return results
 def batch_query_wikidata(terms):
    results={}
    if DRY_RUN or not API_ACTIVE.get("wikidata",False):
        for t in terms: results[t]="TEST_WD"
        return results
    for t in terms:
        url="https://www.wikidata.org/w/api.php"
        params={"action":"wbsearchentities","search":t,"language":"de","format":"json"}
        data=request_with_retries("wikidata",url,params)
        top=""
        if data and "search" in data:
            min_conf=compute_min_conf(t,'wikidata')
            cands=[]
            for e in data["search"]:
                label=e.get("label","")
                if not label: continue
                conf=SequenceMatcher(None,t.lower(),label.lower()).ratio()
                if conf>=min_conf: cands.append((label,conf))
            if cands:
                top=sorted(cands,key=lambda x:x[1],reverse=True)[0][0]
        results[t]=top
    return results
 # =========================
 # Formatabhängige Markierung / Status
 # =========================
 def mark_norm_hits(file_path):
    ext = file_path.suffix.lower()
    if ext in [".xlsx", ".xls"]:
        from openpyxl import load_workbook
        from openpyxl.styles import PatternFill
        wb = load_workbook(file_path)
        ws = wb.active
        green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
        red_fill   = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
        col_map = {cell.value: idx+1 for idx, cell in enumerate(ws[1])}
        norm_col = col_map.get("Norm_Treffer", None)
        if not norm_col:
            log("WARNING","Spalte 'Norm_Treffer' nicht gefunden, keine Markierung möglich")
            return
        for row in ws.iter_rows(min_row=2, min_col=norm_col, max_col=norm_col):
            cell = row[0]
            if cell.value and cell.value!="KEIN TREFFER":
                cell.fill = green_fill
            else:
                cell.fill = red_fill
        wb.save(file_path)
        log("INFO","Excel: Treffer farblich markiert (grün=Treffer, rot=kein Treffer)")
    elif ext==".ods":
        df = pd.read_excel(file_path, engine="odf")
        df["Norm_Status"] = df["Norm_Treffer"].apply(lambda x: "Treffer" if pd.notna(x) and str(x).strip() and x!="KEIN TREFFER" else "Kein Treffer")
        df.to_excel(file_path, index=False, engine="odf")
        log("INFO","ODS: Spalte 'Norm_Status' eingefügt (Treffer / Kein Treffer)")
    else:
        log("WARNING","Unbekanntes Dateiformat, keine Markierung durchgeführt")
 # =========================
 # Verarbeitung Input-Dateien
 # =========================
 def process_files():
    norm_dict, stem_index = load_normvokabular(NORMVOC_FILE)
    total_terms=0
    total_norm_hits=0
    if not INPUT_DIR.exists():
        log("CRITICAL",f"Eingabeordner {INPUT_DIR} fehlt")
        sys.exit(1)
    files=list(INPUT_DIR.glob("*"))
    if not files:
        log("WARNING","Keine Dateien gefunden")
    for file_path in files:
        if not file_path.suffix.lower() in [".ods",".xlsx",".csv",".xls"]:
            continue
        log("INFO",f"Verarbeite Datei: {file_path.name}")
        # Output-Datei für diese Input-Datei erzeugen
        output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}{file_path.suffix}"
        version = 1
        while output_file.exists():
            output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}_({version}){file_path.suffix}"
            version += 1
        try:
            if file_path.suffix.lower()==".csv":
                df=pd.read_csv(file_path)
            elif file_path.suffix.lower()==".ods":
                df=pd.read_excel(file_path, engine="odf")
            else:
                df=pd.read_excel(file_path)
        except Exception as e:
            log("ERROR",f"Datei {file_path.name} konnte nicht gelesen werden: {e}")
            continue
        df.columns=[str(c).strip() for c in df.columns]
        row_terms_map=[]
        for _,row in df.iterrows():
            besch=row.get("Objektbeschreibung","")
            if pd.isna(besch) or not str(besch).strip(): continue
            besch=str(besch).strip()
            clauses=[c.strip() for c in re.split(r",",besch) if c.strip()]
            terms=[]
            for clause in clauses:
                parts=[p.strip() for p in re.split(r"\s+",clause) if p.strip()]
                for p in parts:
                    if p.lower() in STOPWORDS: continue
                    if re.fullmatch(r"\d+",p): continue
                    terms.append(p)
            obj_box=row.get("Objekt/Ebene","")
            urheber=row.get("Urheber","")
            row_terms_map.append((obj_box,urheber,terms))
        all_terms=[]
        for _,_,terms in row_terms_map:
            all_terms.extend(terms)
        all_terms = list(set(all_terms))  # unique
        gnd_results=batch_query_gnd(all_terms)
        wd_results=batch_query_wikidata(all_terms)
        output_rows=[]
        for obj_box,urheber,terms in row_terms_map:
            for term in terms:
                norm_name,norm_id,suggestions = map_to_norm(term,norm_dict, stem_index)
                total_terms+=1
                if norm_name!="KEIN TREFFER":
                    total_norm_hits+=1
                out_row={
                    "Box": obj_box,
                    "Objekt/Ebene": obj_box,
                    "Urheber": urheber,
                    "Begriff": term,
                    "Norm_Treffer": norm_name,
                    "Norm_ID": norm_id,
                    "Norm_Vorschlag": ", ".join(suggestions) if suggestions else "",
                    "GND_Top1": gnd_results.get(term,""),
                    "WD_Top1": wd_results.get(term,"")
                }
                output_rows.append(out_row)
        out_df=pd.DataFrame(output_rows)
        engine = "odf" if output_file.suffix.lower()==".ods" else None
        out_df.to_excel(output_file,index=False,engine=engine)
        log("INFO",f"Auswertung gespeichert: {output_file}")
        mark_norm_hits(output_file)
    save_cache()
    log("INFO",f"Gesamt: {total_terms} Begriffe, {total_norm_hits} Treffer im Normvokabular")
 # =========================
 # Main
 # =========================
 if __name__=="__main__":
    process_files()
    log("INFO","Fertig")
--- a/NormVokabular_Mapper_1.2.py
+++ b/NormVokabular_Mapper_1.2.py
@ -0,0 +1,471 @@
 """
 ========================================================================
 NormVokabular Mapper – Übersicht
 ========================================================================
 Dieses Skript dient dazu, Begriffe aus Datenbeständen mit einem 
 vordefinierten Normvokabular abzugleichen. Es identifiziert Treffer, 
 gibt bei fehlenden Treffern Vorschläge aus und ermöglicht optional 
 einen Abgleich mit externen APIs (GND, Wikidata).
 Hauptfunktionen:
 1. **Input verarbeiten**
   - Liest CSV-, Excel- und ODS-Dateien aus dem Ordner "Input CSV".
   - Extrahiert relevante Begriffe aus Spalten wie "Objektbeschreibung", 
     filtert Stopwords und Zahlen.
 2. **Normvokabular laden**
   - Liest die Masterdatei NV_MASTER.ods ein.
   - Berücksichtigt Hierarchie-IDs, um übergeordnete Begriffe zuordnen zu können.
   - Erstellt ein Index für gestemmte Begriffe, um auch ähnliche Schreibweisen zu erkennen.
 3. **Mapping auf Normvokabular**
   - Prüft, ob ein Begriff exakt oder gestemmt im Normvokabular vorkommt.
   - Wenn kein Treffer vorliegt, werden alternative Vorschläge generiert.
 4. **API-Abgleich (optional)**
   - Fragt GND und Wikidata ab, um den Top-1 Treffer für jeden Begriff zu ermitteln.
   - Nutzt einen Cache, um wiederholte Requests zu vermeiden.
   - Bietet einen Dry-Run-Modus für Tests ohne Internetzugang.
 5. **Ergebnis speichern**
   - Speichert die Auswertung in einem eigenen Ordner "Auswertung Ergebnisse".
   - Markiert Treffer visuell: grün = Treffer, rot = kein Treffer (bei Excel), 
     bzw. fügt Statusspalte bei ODS-Dateien hinzu.
   - Enthält alle relevanten Informationen pro Begriff: Originalbegriff, Normbegriff, 
     Norm-ID, Vorschläge, GND/Wikidata Top1 Treffer.
 6. **Logging**
   - Informiert über Fortschritt, Anzahl der Begriffe, Treffer und mögliche Fehler.
 """
 import os
 import sys
 import re
 import time
 import json
 import pandas as pd
 import requests
 from pathlib import Path
 from collections import defaultdict
 from difflib import SequenceMatcher
 # RapidFuzz für Token-basierte Fuzzy-Suche
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
    print("RapidFuzz verfügbar")
 except ImportError:
    RAPIDFUZZ_AVAILABLE = False
    print("RapidFuzz nicht verfügbar – nutze SequenceMatcher")
 # Spacy Lemmatizer
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
    print("Spacy Lemmatizer aktiviert")
 except:
    SPACY_AVAILABLE = False
    nlp = None
    print("Spacy nicht verfügbar – nutze naive Stemmer")
 # =========================
 # Pfade & Config
 # =========================
 INPUT_DIR = Path("Input CSV")
 OUTPUT_DIR = Path("Auswertung Ergebnisse")
 OUTPUT_DIR.mkdir(exist_ok=True)
 NORMVOC_FILE = Path("Input CSV/Normvokabular_INTERN/NV_MASTER.ods")
 CACHE_FILE = "api_cache.json"
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75
 TIMEOUT = 5
 MAX_RETRIES = 3
 BACKOFF_FACTOR = 2
 HEADERS = {"User-Agent": "VLG_API_multi/1.0 (projekt@example.com)"}
 API_ACTIVE = {"gnd": True, "wikidata": True}
 FAIL_COUNTER = {"gnd": 0, "wikidata": 0}
 # Cache
 if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE,"r",encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 def save_cache():
    with open(CACHE_FILE,"w",encoding="utf-8") as f:
        json.dump(CACHE, f, indent=2, ensure_ascii=False)
 # =========================
 # Normalisierung / Lemma
 # =========================
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        doc = nlp(term_norm)
        lemma = " ".join([token.lemma_ for token in doc])
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 # =========================
 # Kompositum-Zerlegung (erweitert)
 # =========================
 def compound_split(term, norm_dict):
    """
    Zerlegt Komposita durch Prüfen auf Substrings, die im Normvokabular vorkommen.
    """
    term_norm = normalize_text(term)
    matches = []
    for i in range(len(term_norm)):
        for j in range(i+3, len(term_norm)+1):
            sub = term_norm[i:j]
            if sub in norm_dict and sub not in matches:
                matches.append(sub)
    if not matches:
        matches = [term_norm]
    return matches
 # =========================
 # Normvokabular laden & Lemma vorbereiten
 # =========================
 def load_normvokabular(file_path):
    sheets = pd.read_excel(file_path, sheet_name=None, engine="odf" if file_path.suffix.lower()==".ods" else None)
    norm_dict = {}
    stem_index = defaultdict(list)
    lemma_norm_map = {}
    for sheet_name, df in sheets.items():
        if sheet_name.lower() in ["master", "übersicht"]:
            continue
        df = df.dropna(how="all", axis=1)
        df.columns = [str(c).strip() for c in df.columns]
        id_col = next((c for c in df.columns if "ID" in c), None)
        word_col = next((c for c in df.columns if "Wort" in c or "Vokabel" in c), None)
        if not id_col or not word_col:
            continue
        current_parent_id = None
        for _, row in df.iterrows():
            row_id = str(row[id_col]).strip() if pd.notna(row[id_col]) else None
            row_word = str(row[word_col]).strip() if pd.notna(row[word_col]) else None
            if row_id:
                current_parent_id = row_id
            if not row_word:
                continue
            assigned_parent_id = current_parent_id
            entry = {
                "Name": row_word,
                "ID": assigned_parent_id,
                "Sheet": sheet_name,
                "Own_ID": row_id or ""
            }
            key = normalize_text(row_word)
            norm_dict[key] = entry
            lemma = lemmatize_term(key)
            stem_index[lemma].append(entry)
            if lemma not in lemma_norm_map:
                lemma_norm_map[lemma] = entry
    return norm_dict, stem_index, lemma_norm_map
 # =========================
 # Vorschläge & Fuzzy Matching
 # =========================
 def get_suggestions(term_lemma, lemma_norm_map, top_n=3, threshold=CONF_THRESHOLD):
    candidates = []
    for key_lemma, entry in lemma_norm_map.items():
        if RAPIDFUZZ_AVAILABLE:
            score_token = fuzz.token_set_ratio(term_lemma, key_lemma)/100
            score_partial = fuzz.partial_ratio(term_lemma, key_lemma)/100
            score = max(score_token, score_partial)
        else:
            score_seq = SequenceMatcher(None, term_lemma.lower(), key_lemma.lower()).ratio()
            score = score_seq
        # Substring-Boost
        if term_lemma in key_lemma or key_lemma in term_lemma:
            score = max(score, 0.9)
        if score >= threshold:
            candidates.append((score, entry["Name"], entry["ID"]))
    candidates.sort(reverse=True)
    return [f"{name} ({id_})" for _, name, id_ in candidates[:top_n]]
 # =========================
 # Mapping auf Normvokabular
 # =========================
 def map_to_norm(term, norm_dict, stem_index, lemma_norm_map, top_n=3):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    # Exakter Treffer
    if term_norm in norm_dict:
        e = norm_dict[term_norm]
        return e["Name"], e["ID"], []
    # Lemma-Treffer
    if term_lemma in stem_index:
        e = stem_index[term_lemma][0]
        return e["Name"], e["ID"], []
    # KEIN TREFFER → Kompositum-Split & Teilbegriffe prüfen
    tokens = compound_split(term, norm_dict)
    token_matches = []
    all_suggestions = []
    for t in tokens:
        t_lemma = lemmatize_term(t)
        if t_lemma in stem_index:
            e = stem_index[t_lemma][0]
            token_matches.append((t, e["Name"], e["ID"]))
        else:
            sugg = get_suggestions(t_lemma, lemma_norm_map, top_n)
            all_suggestions.extend(sugg)
            token_matches.append((t, "KEIN TREFFER", "", sugg))
    combined_matches = [m[1] for m in token_matches if m[1] != "KEIN TREFFER"]
    if combined_matches:
        return "KEIN TREFFER", "", combined_matches
    elif all_suggestions:
        return "KEIN TREFFER", "", all_suggestions
    else:
        return "KEIN TREFFER", "", []
 # =========================
 # API-Abfragen
 # =========================
 def request_with_retries(api_name,url,params=None):
    cache_key = url + str(params)
    if cache_key in CACHE:
        return CACHE[cache_key]
    retries = 0
    while retries < MAX_RETRIES:
        try:
            r = requests.get(url, params=params, timeout=TIMEOUT, headers=HEADERS)
            if r.status_code == 200:
                try: data = r.json()
                except: data = r.text
                CACHE[cache_key] = data
                FAIL_COUNTER[api_name] = 0
                return data
        except:
            pass
        retries += 1
        time.sleep(min(BACKOFF_FACTOR**retries,30))
    FAIL_COUNTER[api_name] += 1
    if FAIL_COUNTER[api_name] >= 10:
        API_ACTIVE[api_name] = False
    return None
 def batch_query_gnd(terms):
    results={}
    if not API_ACTIVE.get("gnd", False):
        for t in terms: results[t] = ""
        return results
    for t in terms:
        url="https://lobid.org/gnd/search"
        params={"q":t,"format":"json"}
        data = request_with_retries("gnd", url, params)
        top = ""
        if data and "member" in data:
            cands = [(doc.get("preferredName","") or doc.get("name",""), SequenceMatcher(None,t.lower(),(doc.get("preferredName","") or doc.get("name","")).lower()).ratio()) for doc in data["member"] if doc.get("preferredName","") or doc.get("name","")]
            cands = [c for c in cands if c[1]>=0.75]
            if cands:
                top = sorted(cands,key=lambda x:x[1],reverse=True)[0][0]
        results[t] = top
    return results
 def batch_query_wikidata(terms):
    results={}
    if not API_ACTIVE.get("wikidata", False):
        for t in terms: results[t] = ""
        return results
    for t in terms:
        url="https://www.wikidata.org/w/api.php"
        params={"action":"wbsearchentities","search":t,"language":"de","format":"json"}
        data = request_with_retries("wikidata", url, params)
        top = ""
        if data and "search" in data:
            cands = [(e.get("label",""), SequenceMatcher(None,t.lower(),e.get("label","").lower()).ratio()) for e in data["search"] if e.get("label","")]
            cands = [c for c in cands if c[1]>=0.70]
            if cands:
                top = sorted(cands,key=lambda x:x[1],reverse=True)[0][0]
        results[t] = top
    return results
 # =========================
 # Markierung / Export
 # =========================
 def mark_norm_hits(file_path):
    ext = file_path.suffix.lower()
    if ext in [".xlsx", ".xls"]:
        from openpyxl import load_workbook
        from openpyxl.styles import PatternFill
        wb = load_workbook(file_path)
        ws = wb.active
        green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
        red_fill   = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
        col_map = {cell.value: idx+1 for idx, cell in enumerate(ws[1])}
        norm_col = col_map.get("Norm_Treffer", None)
        if not norm_col:
            print("Spalte 'Norm_Treffer' nicht gefunden")
            return
        for row in ws.iter_rows(min_row=2, min_col=norm_col, max_col=norm_col):
            cell = row[0]
            if cell.value and cell.value != "KEIN TREFFER":
                cell.fill = green_fill
            else:
                cell.fill = red_fill
        wb.save(file_path)
    elif ext==".ods":
        df = pd.read_excel(file_path, engine="odf")
        df["Norm_Status"] = df["Norm_Treffer"].apply(lambda x: "Treffer" if pd.notna(x) and str(x).strip() and x!="KEIN TREFFER" else "Kein Treffer")
        df.to_excel(file_path, index=False, engine="odf")
 # =========================
 # Export mit zweitem Sheet für Begriffe ohne Treffer und Vorschlag
 # =========================
 def export_results_with_no_hits(out_df, output_file):
    """
    Exportiert das Mapping-Ergebnis und zusätzlich ein zweites Sheet
    mit allen Begriffen, deren Norm_Treffer == 'KEIN TREFFER' und Norm_Vorschlag leer ist.
    """
    # Begriffe ohne Treffer und ohne Vorschlag
    no_match_df = out_df[(out_df["Norm_Treffer"]=="KEIN TREFFER") & (out_df["Norm_Vorschlag"].isna() | (out_df["Norm_Vorschlag"].str.strip()==""))].copy()
    ext = output_file.suffix.lower()
    if ext in [".xlsx", ".xls"]:
        with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
            out_df.to_excel(writer, index=False, sheet_name="Mapping")
            no_match_df.to_excel(writer, index=False, sheet_name="Keine Treffer")
    elif ext == ".ods":
        # ODS-Export via odf-Engine
        with pd.ExcelWriter(output_file, engine="odf") as writer:
            out_df.to_excel(writer, index=False, sheet_name="Mapping")
            no_match_df.to_excel(writer, index=False, sheet_name="Keine Treffer")
 # =========================
 # Verarbeitung Input-Dateien
 # =========================
 def process_files():
    norm_dict, stem_index, lemma_norm_map = load_normvokabular(NORMVOC_FILE)
    total_terms = 0
    total_hits = 0
    if not INPUT_DIR.exists():
        print(f"Eingabeordner {INPUT_DIR} fehlt")
        sys.exit(1)
    files = list(INPUT_DIR.glob("*"))
    if not files:
        print("Keine Dateien gefunden")
        return
    for file_path in files:
        if not file_path.suffix.lower() in [".csv",".ods",".xls",".xlsx"]:
            continue
        print(f"Verarbeite Datei: {file_path.name}")
        try:
            if file_path.suffix.lower() == ".csv":
                df = pd.read_csv(file_path)
            else:
                df = pd.read_excel(file_path, engine="odf" if file_path.suffix.lower()==".ods" else None)
        except Exception as e:
            print(f"Fehler beim Lesen von {file_path.name}: {e}")
            continue
        df = df.dropna(how="all")
        df.columns = [str(c).strip() for c in df.columns]
        besch_col = next((c for c in df.columns if "Objektbeschreibung" in c), None)
        box_col = next((c for c in df.columns if "Objekt/Ebene" in c), None)
        urh_col = next((c for c in df.columns if "Urheber" in c), None)
        if not besch_col: continue
        row_terms_map = []
        for _, row in df.iterrows():
            besch = str(row[besch_col]).strip() if pd.notna(row[besch_col]) else ""
            if not besch: continue
            obj_box = row[box_col] if box_col else ""
            urheber = row[urh_col] if urh_col else ""
            clauses = [c.strip() for c in re.split(r",", besch) if c.strip()]
            terms = []
            for clause in clauses:
                parts = [p.strip() for p in re.split(r"\s+", clause) if p.strip()]
                for p in parts:
                    if p.lower() in STOPWORDS: continue
                    if re.fullmatch(r"\d+", p): continue
                    terms.append(p)
            row_terms_map.append((obj_box, urheber, terms))
        all_terms = list({t for _,_,terms in row_terms_map for t in terms})
        gnd_results = batch_query_gnd(all_terms)
        wd_results = batch_query_wikidata(all_terms)
        output_rows = []
        for obj_box, urheber, terms in row_terms_map:
            for term in terms:
                norm_name, norm_id, suggestions = map_to_norm(term, norm_dict, stem_index, lemma_norm_map)
                total_terms += 1
                if norm_name != "KEIN TREFFER":
                    total_hits += 1
                out_row = {
                    "Box": obj_box,
                    "Objekt/Ebene": obj_box,
                    "Urheber": urheber,
                    "Begriff": term,
                    "Norm_Treffer": norm_name,
                    "Norm_ID": norm_id,
                    "Norm_Vorschlag": ", ".join(suggestions) if suggestions else "",
                    "GND_Top1": gnd_results.get(term,""),
                    "WD_Top1": wd_results.get(term,"")
                }
                output_rows.append(out_row)
        out_df = pd.DataFrame(output_rows)
        output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}{file_path.suffix}"
        version = 1
        while output_file.exists():
            output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}_({version}){file_path.suffix}"
            version += 1
        export_results_with_no_hits(out_df, output_file)
        mark_norm_hits(output_file)
        print(f"Auswertung gespeichert: {output_file}")
    save_cache()
    print(f"Gesamt: {total_terms} Begriffe, {total_hits} Treffer im Normvokabular")
 # =========================
 # Main
 # =========================
 if __name__ == "__main__":
    process_files()
    print("Fertig")
--- a/NormVokabular_Mapper_1.3.py
+++ b/NormVokabular_Mapper_1.3.py
@ -0,0 +1,509 @@
 """
 ========================================================================
 NormVokabular Mapper – Übersicht
 ========================================================================
 Dieses Skript dient dazu, Begriffe aus Datenbeständen mit einem 
 vordefinierten Normvokabular abzugleichen. Es identifiziert Treffer, 
 gibt bei fehlenden Treffern Vorschläge aus und ermöglicht optional 
 einen Abgleich mit externen APIs (GND, Wikidata).
 Hauptfunktionen:
 1. **Input verarbeiten**
   - Liest CSV-, Excel- und ODS-Dateien aus dem Ordner "Input CSV".
   - Extrahiert relevante Begriffe aus Spalten wie "Objektbeschreibung", 
     filtert Stopwords und Zahlen.
 2. **Normvokabular laden**
   - Liest die Masterdatei NV_MASTER.ods ein.
   - Berücksichtigt Hierarchie-IDs, um übergeordnete Begriffe zuordnen zu können.
   - Erstellt ein Index für gestemmte Begriffe, um auch ähnliche Schreibweisen zu erkennen.
 3. **Mapping auf Normvokabular**
   - Prüft, ob ein Begriff exakt oder gestemmt im Normvokabular vorkommt.
   - Wenn kein Treffer vorliegt, werden alternative Vorschläge generiert.
 4. **API-Abgleich (optional)**
   - Fragt GND und Wikidata ab, um den Top-1 Treffer für jeden Begriff zu ermitteln.
   - Nutzt einen Cache, um wiederholte Requests zu vermeiden.
   - Bietet einen Dry-Run-Modus für Tests ohne Internetzugang.
 5. **Ergebnis speichern**
   - Speichert die Auswertung in einem eigenen Ordner "Auswertung Ergebnisse".
   - Markiert Treffer visuell: grün = Treffer, rot = kein Treffer (bei Excel), 
     bzw. fügt Statusspalte bei ODS-Dateien hinzu.
   - Enthält alle relevanten Informationen pro Begriff: Originalbegriff, Normbegriff, 
     Norm-ID, Vorschläge, GND/Wikidata Top1 Treffer.
 6. **Logging**
   - Informiert über Fortschritt, Anzahl der Begriffe, Treffer und mögliche Fehler.
 """
 import os
 import sys
 import re
 import time
 import json
 import pandas as pd
 import requests
 from pathlib import Path
 from collections import defaultdict
 from difflib import SequenceMatcher
 # RapidFuzz für Token-basierte Fuzzy-Suche
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
    print("RapidFuzz verfügbar")
 except ImportError:
    RAPIDFUZZ_AVAILABLE = False
    print("RapidFuzz nicht verfügbar – nutze SequenceMatcher")
 # Spacy Lemmatizer
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
    print("Spacy Lemmatizer aktiviert")
 except:
    SPACY_AVAILABLE = False
    nlp = None
    print("Spacy nicht verfügbar – nutze naive Stemmer")
 # =========================
 # Pfade & Config
 # =========================
 INPUT_DIR = Path("Input CSV")
 OUTPUT_DIR = Path("Auswertung Ergebnisse")
 OUTPUT_DIR.mkdir(exist_ok=True)
 NORMVOC_FILE = Path("Input CSV/Normvokabular_INTERN/NV_MASTER.ods")
 CACHE_FILE = "api_cache.json"
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75
 TIMEOUT = 5
 MAX_RETRIES = 3
 BACKOFF_FACTOR = 2
 HEADERS = {"User-Agent": "VLG_API_multi/1.0 (projekt@example.com)"}
 API_ACTIVE = {"gnd": True, "wikidata": True}
 FAIL_COUNTER = {"gnd": 0, "wikidata": 0}
 # Cache
 if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE,"r",encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 def save_cache():
    with open(CACHE_FILE,"w",encoding="utf-8") as f:
        json.dump(CACHE, f, indent=2, ensure_ascii=False)
 # =========================
 # Normalisierung / Lemma
 # =========================
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 # Lemma-Cache
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        doc = nlp(term_norm)
        lemma = " ".join([token.lemma_ for token in doc])
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 # =========================
 # Kompositum-Zerlegung (einfacher Ansatz)
 # =========================
 def compound_split(term):
    parts = re.findall(r'[A-ZÄÖÜ][a-zäöü]+', term)
    return parts if parts else [term]
 # =========================
 # Normvokabular laden & Lemma vorbereiten
 # =========================
 def load_normvokabular(file_path):
    sheets = pd.read_excel(file_path, sheet_name=None, engine="odf" if file_path.suffix.lower()==".ods" else None)
    norm_dict = {}
    stem_index = defaultdict(list)
    lemma_norm_map = {}  # für RapidFuzz preprocessed
    for sheet_name, df in sheets.items():
        if sheet_name.lower() in ["master", "übersicht"]:
            continue
        df = df.dropna(how="all", axis=1)
        df.columns = [str(c).strip() for c in df.columns]
        id_col = next((c for c in df.columns if "ID" in c), None)
        word_col = next((c for c in df.columns if "Wort" in c or "Vokabel" in c), None)
        if not id_col or not word_col:
            continue
        current_parent_id = None
        for _, row in df.iterrows():
            row_id = str(row[id_col]).strip() if pd.notna(row[id_col]) else None
            row_word = str(row[word_col]).strip() if pd.notna(row[word_col]) else None
            if row_id:
                current_parent_id = row_id
            if not row_word:
                continue
            assigned_parent_id = current_parent_id
            entry = {
                "Name": row_word,
                "ID": assigned_parent_id,   # Parent-ID
                "Sheet": sheet_name,
                "Own_ID": row_id or ""      # eigene ID, falls vorhanden
            }
            key = normalize_text(row_word)
            norm_dict[key] = entry
            lemma = lemmatize_term(key)
            stem_index[lemma].append(entry)
            if lemma not in lemma_norm_map:
                lemma_norm_map[lemma] = entry
    return norm_dict, stem_index, lemma_norm_map
 # =========================
 # Mapping & Vorschläge
 # =========================
 def map_to_norm(term, norm_dict, stem_index, lemma_norm_map, top_n=3):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    # Exakter Treffer
    if term_norm in norm_dict:
        e = norm_dict[term_norm]
        return e["Name"], e["ID"], []
    # Lemma-Treffer
    if term_lemma in stem_index:
        e = stem_index[term_lemma][0]
        return e["Name"], e["ID"], []
    # KEIN TREFFER → Kompositum-Split
    tokens = compound_split(term)
    if len(tokens) == 1:
        suggestions = get_suggestions(term_lemma, lemma_norm_map, top_n)
        return "KEIN TREFFER", "", suggestions
    else:
        token_matches = []
        for t in tokens:
            t_lemma = lemmatize_term(t)
            if t_lemma in stem_index:
                e = stem_index[t_lemma][0]
                token_matches.append((t, e["Name"], e["ID"]))
            else:
                sugg = get_suggestions(t_lemma, lemma_norm_map, top_n)
                token_matches.append((t, "KEIN TREFFER", "", sugg))
        combined_suggestions = [f"{m[1]} ({m[2]})" for m in token_matches if m[1] != "KEIN TREFFER"]
        return "KEIN TREFFER", "", combined_suggestions
 def get_suggestions(term_lemma, lemma_norm_map, top_n=3, threshold=CONF_THRESHOLD):
    candidates = []
    for key_lemma, entry in lemma_norm_map.items():
        if RAPIDFUZZ_AVAILABLE:
            score = fuzz.token_set_ratio(term_lemma, key_lemma)/100
        else:
            score = SequenceMatcher(None, term_lemma.lower(), key_lemma.lower()).ratio()
        if key_lemma.lower().startswith(term_lemma.lower()):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            candidates.append((score, entry["Name"], entry["ID"]))
    candidates.sort(reverse=True)
    return [f"{name} ({id_})" for _, name, id_ in candidates[:top_n]]
 # =========================
 # API-Abfragen
 # =========================
 def request_with_retries(api_name,url,params=None):
    cache_key = url + str(params)
    if cache_key in CACHE:
        return CACHE[cache_key]
    retries = 0
    while retries < MAX_RETRIES:
        try:
            r = requests.get(url, params=params, timeout=TIMEOUT, headers=HEADERS)
            if r.status_code == 200:
                try: data = r.json()
                except: data = r.text
                CACHE[cache_key] = data
                FAIL_COUNTER[api_name] = 0
                return data
        except:
            pass
        retries += 1
        time.sleep(min(BACKOFF_FACTOR**retries,30))
    FAIL_COUNTER[api_name] += 1
    if FAIL_COUNTER[api_name] >= 10:
        API_ACTIVE[api_name] = False
    return None
 def batch_query_gnd(terms):
    results={}
    if not API_ACTIVE.get("gnd", False):
        for t in terms: results[t] = ""
        return results
    for t in terms:
        url="https://lobid.org/gnd/search"
        params={"q":t,"format":"json"}
        data = request_with_retries("gnd", url, params)
        top = ""
        if data and "member" in data:
            cands = [(doc.get("preferredName","") or doc.get("name",""), SequenceMatcher(None,t.lower(),(doc.get("preferredName","") or doc.get("name","")).lower()).ratio()) for doc in data["member"] if doc.get("preferredName","") or doc.get("name","")]
            cands = [c for c in cands if c[1]>=0.75]
            if cands:
                top = sorted(cands,key=lambda x:x[1],reverse=True)[0][0]
        results[t] = top
    return results
 def batch_query_wikidata(terms):
    results={}
    if not API_ACTIVE.get("wikidata", False):
        for t in terms: results[t] = ""
        return results
    for t in terms:
        url="https://www.wikidata.org/w/api.php"
        params={"action":"wbsearchentities","search":t,"language":"de","format":"json"}
        data = request_with_retries("wikidata", url, params)
        top = ""
        if data and "search" in data:
            cands = [(e.get("label",""), SequenceMatcher(None,t.lower(),e.get("label","").lower()).ratio()) for e in data["search"] if e.get("label","")]
            cands = [c for c in cands if c[1]>=0.70]
            if cands:
                top = sorted(cands,key=lambda x:x[1],reverse=True)[0][0]
        results[t] = top
    return results
 # =========================
 # Markierung / Export
 # =========================
 def mark_norm_hits(file_path):
    ext = file_path.suffix.lower()
    if ext in [".xlsx", ".xls"]:
        from openpyxl import load_workbook
        from openpyxl.styles import PatternFill
        wb = load_workbook(file_path)
        ws = wb.active
        green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
        red_fill   = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
        col_map = {cell.value: idx+1 for idx, cell in enumerate(ws[1])}
        norm_col = col_map.get("Norm_Treffer", None)
        if not norm_col:
            print("Spalte 'Norm_Treffer' nicht gefunden")
            return
        for row in ws.iter_rows(min_row=2, min_col=norm_col, max_col=norm_col):
            cell = row[0]
            if cell.value and cell.value != "KEIN TREFFER":
                cell.fill = green_fill
            else:
                cell.fill = red_fill
        wb.save(file_path)
    elif ext==".ods":
        df = pd.read_excel(file_path, engine="odf")
        df["Norm_Status"] = df["Norm_Treffer"].apply(lambda x: "Treffer" if pd.notna(x) and str(x).strip() and x!="KEIN TREFFER" else "Kein Treffer")
        df.to_excel(file_path, index=False, engine="odf")
 # =========================
 # Verarbeitung Input-Dateien
 # =========================
 ## =========================
 # Neue Funktion: fehlende Begriffe in separate Datei exportieren
 # =========================
 def export_missing_terms(out_df, output_file):
    # Filter: KEIN TREFFER & keine Vorschläge
    missing_df = out_df[
        (out_df["Norm_Treffer"] == "KEIN TREFFER") & 
        (out_df["Norm_Vorschlag"].isna() | (out_df["Norm_Vorschlag"].str.strip() == ""))
    ][["Begriff"]].drop_duplicates()
    count_missing = len(missing_df)
    print(f"Anzahl Begriffe ohne Treffer und Vorschläge: {count_missing}")
    if count_missing == 0:
        return
    # Neue Datei erzeugen
    ext = output_file.suffix.lower()
    base_name = output_file.stem
    missing_file = OUTPUT_DIR / f"{base_name}_fehlende_Begriffe{output_file.suffix}"
    # Bei vorhandener Datei: Versionsnummer anhängen
    version = 1
    while missing_file.exists():
        missing_file = OUTPUT_DIR / f"{base_name}_fehlende_Begriffe_({version}){output_file.suffix}"
        version += 1
    if ext in [".xlsx", ".xls"]:
        missing_df.to_excel(missing_file, index=False, engine="openpyxl")
    elif ext == ".ods":
        missing_df.to_excel(missing_file, index=False, engine="odf")
    else:
        # Für CSV
        missing_df.to_csv(missing_file, index=False, sep=";")
    print(f"Fehlende Begriffe gespeichert: {missing_file}")
 # =========================
 # Verarbeitung Input-Dateien (final)
 # =========================
 # =========================
 # Neue Funktion: fehlende Begriffe in separate Datei exportieren
 # =========================
 def export_missing_terms(out_df, output_file):
    # Filter: KEIN TREFFER & keine Vorschläge
    missing_df = out_df[
        (out_df["Norm_Treffer"] == "KEIN TREFFER") & 
        (out_df["Norm_Vorschlag"].isna() | (out_df["Norm_Vorschlag"].str.strip() == ""))
    ][["Begriff"]].drop_duplicates()
    count_missing = len(missing_df)
    print(f"Anzahl Begriffe ohne Treffer und Vorschläge: {count_missing}")
    if count_missing == 0:
        return
    # Neue Datei erzeugen
    ext = output_file.suffix.lower()
    base_name = output_file.stem
    missing_file = OUTPUT_DIR / f"{base_name}_fehlende_Begriffe{output_file.suffix}"
    # Bei vorhandener Datei: Versionsnummer anhängen
    version = 1
    while missing_file.exists():
        missing_file = OUTPUT_DIR / f"{base_name}_fehlende_Begriffe_({version}){output_file.suffix}"
        version += 1
    if ext in [".xlsx", ".xls"]:
        missing_df.to_excel(missing_file, index=False, engine="openpyxl")
    elif ext == ".ods":
        missing_df.to_excel(missing_file, index=False, engine="odf")
    else:
        # Für CSV
        missing_df.to_csv(missing_file, index=False, sep=";")
    print(f"Fehlende Begriffe gespeichert: {missing_file}")
 # =========================
 # Verarbeitung Input-Dateien (final)
 # =========================
 def process_files():
    norm_dict, stem_index, lemma_norm_map = load_normvokabular(NORMVOC_FILE)
    total_terms = 0
    total_hits = 0
    if not INPUT_DIR.exists():
        print(f"Eingabeordner {INPUT_DIR} fehlt")
        sys.exit(1)
    files = list(INPUT_DIR.glob("*"))
    if not files:
        print("Keine Dateien gefunden")
        return
    for file_path in files:
        if not file_path.suffix.lower() in [".csv",".ods",".xls",".xlsx"]:
            continue
        print(f"Verarbeite Datei: {file_path.name}")
        try:
            if file_path.suffix.lower() == ".csv":
                df = pd.read_csv(file_path)
            else:
                df = pd.read_excel(file_path, engine="odf" if file_path.suffix.lower()==".ods" else None)
        except Exception as e:
            print(f"Fehler beim Lesen von {file_path.name}: {e}")
            continue
        df = df.dropna(how="all")
        df.columns = [str(c).strip() for c in df.columns]
        besch_col = next((c for c in df.columns if "Objektbeschreibung" in c), None)
        box_col = next((c for c in df.columns if "Objekt/Ebene" in c), None)
        urh_col = next((c for c in df.columns if "Urheber" in c), None)
        if not besch_col: continue
        row_terms_map = []
        for _, row in df.iterrows():
            besch = str(row[besch_col]).strip() if pd.notna(row[besch_col]) else ""
            if not besch: continue
            obj_box = row[box_col] if box_col else ""
            urheber = row[urh_col] if urh_col else ""
            clauses = [c.strip() for c in re.split(r",", besch) if c.strip()]
            terms = []
            for clause in clauses:
                parts = [p.strip() for p in re.split(r"\s+", clause) if p.strip()]
                for p in parts:
                    if p.lower() in STOPWORDS: continue
                    if re.fullmatch(r"\d+", p): continue
                    terms.append(p)
            row_terms_map.append((obj_box, urheber, terms))
        all_terms = list({t for _,_,terms in row_terms_map for t in terms})
        gnd_results = batch_query_gnd(all_terms)
        wd_results = batch_query_wikidata(all_terms)
        output_rows = []
        for obj_box, urheber, terms in row_terms_map:
            for term in terms:
                norm_name, norm_id, suggestions = map_to_norm(term, norm_dict, stem_index, lemma_norm_map)
                total_terms += 1
                if norm_name != "KEIN TREFFER":
                    total_hits += 1
                out_row = {
                    "Box": obj_box,
                    "Objekt/Ebene": obj_box,
                    "Urheber": urheber,
                    "Begriff": term,
                    "Norm_Treffer": norm_name,
                    "Norm_ID": norm_id,
                    "Norm_Vorschlag": ", ".join(suggestions) if suggestions else "",
                    "GND_Top1": gnd_results.get(term,""),
                    "WD_Top1": wd_results.get(term,"")
                }
                output_rows.append(out_row)
        out_df = pd.DataFrame(output_rows)
        output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}{file_path.suffix}"
        version = 1
        while output_file.exists():
            output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}_({version}){file_path.suffix}"
            version += 1
        engine = "odf" if output_file.suffix.lower()==".ods" else None
        out_df.to_excel(output_file, index=False, engine=engine)
        # --- NEU: fehlende Begriffe in separate Datei ---
        export_missing_terms(out_df, output_file)
        mark_norm_hits(output_file)
        print(f"Auswertung gespeichert: {output_file}")
    save_cache()
    print(f"Gesamt: {total_terms} Begriffe, {total_hits} Treffer im Normvokabular")
 # =========================
 # Main
 # =========================
 if __name__ == "__main__":
    process_files()
    print("Fertig")
--- a/NormVokabular_Mapper_1.4.py
+++ b/NormVokabular_Mapper_1.4.py
@ -0,0 +1,747 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 NormVokabular Mapper – Version 1.4.1
 - Detailliertes (DEBUG) Batch-Logging: gepufferte Logs werden periodisch in Konsole + Datei geschrieben
 - Getty AAT (SPARQL via requests) – API-polite, timeout/retries/backoff
 - Fehlertoleranz: API-Ausfälle führen nicht zum Totalabsturz
 - Fehlende Begriffe -> separate Datei (gleiches Format wie Output)
 - Bestehende Normalisierung/Lemmatisierung/Stemming wird weiterverwendet
 - Batch-Logging-Modus (konfigurierbar)
 """
 from __future__ import annotations
 import os
 import sys
 import re
 import time
 import json
 import threading
 import queue
 import requests
 import pandas as pd
 from pathlib import Path
 from collections import defaultdict
 from difflib import SequenceMatcher
 from datetime import datetime
 # Optional libs
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except Exception:
    RAPIDFUZZ_AVAILABLE = False
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except Exception:
    SPACY_AVAILABLE = False
    nlp = None
 # =========================
 # Config & Pfade
 # =========================
 INPUT_DIR = Path("Input CSV")
 OUTPUT_DIR = Path("Auswertung Ergebnisse")
 OUTPUT_DIR.mkdir(exist_ok=True)
 NORMVOC_FILE = Path("Input CSV/Normvokabular_INTERN/NV_MASTER.ods")
 CACHE_FILE = "api_cache.json"
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75
 TIMEOUT_DEFAULT = 5
 MAX_RETRIES_DEFAULT = 3
 BACKOFF_FACTOR_DEFAULT = 2
 HEADERS = {"User-Agent": "VLG_API_multi/1.0 (projekt@example.com)"}
 API_ACTIVE = {"gnd": True, "wikidata": True, "aat": True}
 FAIL_COUNTER = {"gnd": 0, "wikidata": 0, "aat": 0}
 # Logging file
 LOG_FILE = OUTPUT_DIR / "mapper_log.txt"
 # Batch logging parameters
 LOG_BATCH_SIZE = 100        # flush wenn >= Einträge
 LOG_FLUSH_INTERVAL = 5.0   # Sekunden zwischen Flushes (Batch-Logging)
 LOG_LEVEL = "DEBUG"        # ausführlich gewünscht
 # =========================
 # Buffered/Batched Logger
 # =========================
 class BatchLogger:
    def __init__(self, logfile: Path, flush_interval: float = 5.0, batch_size: int = 100, level: str = "DEBUG"):
        self.logfile = logfile
        self.flush_interval = flush_interval
        self.batch_size = batch_size
        self.level = level
        self.q = queue.Queue()
        self._stop_event = threading.Event()
        self._thread = threading.Thread(target=self._worker, daemon=True, name="BatchLoggerThread")
        # Ensure logfile exists
        try:
            logfile.parent.mkdir(parents=True, exist_ok=True)
            logfile.touch(exist_ok=True)
        except Exception:
            pass
        self._thread.start()
    def _format(self, level: str, msg: str) -> str:
        ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        return f"{ts} - {level} - {msg}"
    def log(self, level: str, msg: str):
        if self._stop_event.is_set():
            return
        formatted = self._format(level, msg)
        self.q.put((level, formatted))
        # If queue too big, trigger immediate flush by putting a special token
        if self.q.qsize() >= self.batch_size:
            self.q.put(("__FLUSH__", "__FLUSH__"))
    def debug(self, msg: str):
        if LOG_LEVEL in ("DEBUG",):
            self.log("DEBUG", msg)
    def info(self, msg: str):
        self.log("INFO", msg)
    def warning(self, msg: str):
        self.log("WARNING", msg)
    def error(self, msg: str):
        self.log("ERROR", msg)
    def exception(self, msg: str):
        self.log("EXCEPTION", msg)
    def _worker(self):
        buffer = []
        last_flush = time.time()
        while not self._stop_event.is_set() or not self.q.empty():
            try:
                item = None
                try:
                    item = self.q.get(timeout=self.flush_interval)
                except queue.Empty:
                    # time-based flush
                    if buffer:
                        self._flush_buffer(buffer)
                        buffer = []
                        last_flush = time.time()
                    continue
                if item is None:
                    continue
                level, formatted = item
                if level == "__FLUSH__":
                    if buffer:
                        self._flush_buffer(buffer)
                        buffer = []
                        last_flush = time.time()
                    continue
                buffer.append((level, formatted))
                # flush conditions
                if len(buffer) >= self.batch_size or (time.time() - last_flush) >= self.flush_interval:
                    self._flush_buffer(buffer)
                    buffer = []
                    last_flush = time.time()
            except Exception as e:
                # As a last resort, write error immediately to stderr
                try:
                    sys.stderr.write(f"BatchLogger worker error: {e}\n")
                except Exception:
                    pass
                time.sleep(0.5)
        # final flush
        if buffer:
            self._flush_buffer(buffer)
    def _flush_buffer(self, buffer):
        if not buffer:
            return
        # write to console and file
        try:
            # console
            out_lines = [f"{line}\n" for _, line in buffer]
            # write to stdout
            try:
                sys.stdout.writelines(out_lines)
                sys.stdout.flush()
            except Exception:
                pass
            # append to file
            try:
                with open(self.logfile, "a", encoding="utf-8") as f:
                    f.writelines(out_lines)
            except Exception as e:
                try:
                    sys.stderr.write(f"BatchLogger file write error: {e}\n")
                except Exception:
                    pass
        except Exception:
            pass
    def stop(self):
        self._stop_event.set()
        # put sentinel to wake worker
        try:
            self.q.put(("__FLUSH__", "__FLUSH__"))
        except Exception:
            pass
        self._thread.join(timeout=5.0)
 # Instantiate logger
 logger = BatchLogger(LOG_FILE, flush_interval=LOG_FLUSH_INTERVAL, batch_size=LOG_BATCH_SIZE, level=LOG_LEVEL)
 logger.info("Starte NormVokabular Mapper v1.4.1 (Batch-Logging aktiv)")
 # =========================
 # Cache laden/speichern
 # =========================
 if os.path.exists(CACHE_FILE):
    try:
        with open(CACHE_FILE,"r",encoding="utf-8") as f:
            CACHE = json.load(f)
        logger.debug(f"Cache geladen ({len(CACHE)} Einträge).")
    except Exception as e:
        logger.warning(f"Cache konnte nicht geladen werden: {e}")
        CACHE = {}
 else:
    CACHE = {}
 def save_cache():
    try:
        with open(CACHE_FILE,"w",encoding="utf-8") as f:
            json.dump(CACHE, f, indent=2, ensure_ascii=False)
        logger.debug("Cache gespeichert.")
    except Exception as e:
        logger.error(f"Cache konnte nicht gespeichert werden: {e}")
 # =========================
 # Normalisierung / Lemma / Tokenization
 # =========================
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        try:
            doc = nlp(term_norm)
            lemma = " ".join([token.lemma_ for token in doc])
        except Exception:
            lemma = term_norm
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 def compound_split(term):
    if not term:
        return []
    parts = [p for p in re.split(r"[\s\-_/]+", term) if p]
    return parts if parts else [term]
 # =========================
 # Normvokabular laden & Index
 # =========================
 def load_normvokabular(file_path):
    try:
        sheets = pd.read_excel(file_path, sheet_name=None, engine="odf" if file_path.suffix.lower()==".ods" else None)
    except Exception as e:
        logger.error(f"Normvokabular konnte nicht geladen werden: {e}")
        raise
    norm_dict = {}
    stem_index = defaultdict(list)
    lemma_norm_map = {}
    for sheet_name, df in sheets.items():
        if sheet_name.lower() in ["master", "übersicht"]:
            continue
        df = df.dropna(how="all", axis=1)
        df.columns = [str(c).strip() for c in df.columns]
        id_col = next((c for c in df.columns if "ID" in c), None)
        word_col = next((c for c in df.columns if "Wort" in c or "Vokabel" in c or "Begriff" in c), None)
        if not id_col or not word_col:
            continue
        current_parent_id = None
        for _, row in df.iterrows():
            row_id = str(row[id_col]).strip() if pd.notna(row[id_col]) else None
            row_word = str(row[word_col]).strip() if pd.notna(row[word_col]) else None
            if row_id:
                current_parent_id = row_id
            if not row_word:
                continue
            assigned_parent_id = current_parent_id
            entry = {"Name": row_word, "ID": assigned_parent_id or "", "Sheet": sheet_name, "Own_ID": row_id or ""}
            key = normalize_text(row_word)
            norm_dict[key] = entry
            lemma = lemmatize_term(key)
            stem_index[lemma].append(entry)
            if lemma not in lemma_norm_map:
                lemma_norm_map[lemma] = entry
    logger.info(f"Normvokabular geladen: {len(norm_dict)} Einträge, {len(stem_index)} Stems")
    return norm_dict, stem_index, lemma_norm_map
 # =========================
 # Mapping & Vorschläge
 # =========================
 def map_to_norm(term, norm_dict, stem_index, lemma_norm_map, top_n=3):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    if term_norm in norm_dict:
        e = norm_dict[term_norm]
        logger.debug(f"map_to_norm: exakter Treffer für '{term}' -> {e['Name']}")
        return e["Name"], e["ID"], []
    if term_lemma in stem_index:
        e = stem_index[term_lemma][0]
        logger.debug(f"map_to_norm: Lemma-Treffer für '{term}' -> {e['Name']}")
        return e["Name"], e["ID"], []
    tokens = compound_split(term_norm)
    if len(tokens) == 1:
        suggestions = get_suggestions(term_lemma, lemma_norm_map, top_n)
        logger.debug(f"map_to_norm: KEIN TREFFER für '{term}', Vorschläge: {suggestions}")
        return "KEIN TREFFER", "", suggestions
    else:
        token_matches = []
        for t in tokens:
            t_lemma = lemmatize_term(t)
            if t_lemma in stem_index:
                e = stem_index[t_lemma][0]
                token_matches.append((t, e["Name"], e["ID"]))
            else:
                sugg = get_suggestions(t_lemma, lemma_norm_map, top_n)
                token_matches.append((t, "KEIN TREFFER", "", sugg))
        combined_suggestions = [f"{m[1]} ({m[2]})" for m in token_matches if m[1] != "KEIN TREFFER"]
        logger.debug(f"map_to_norm: Kompositum '{term}' -> combined_suggestions: {combined_suggestions}")
        return "KEIN TREFFER", "", combined_suggestions
 def get_suggestions(term_lemma, lemma_norm_map, top_n=3, threshold=CONF_THRESHOLD):
    candidates = []
    for key_lemma, entry in lemma_norm_map.items():
        if RAPIDFUZZ_AVAILABLE:
            score = fuzz.token_set_ratio(term_lemma, key_lemma)/100
        else:
            score = SequenceMatcher(None, term_lemma.lower(), key_lemma.lower()).ratio()
        if key_lemma.lower().startswith(term_lemma.lower()):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            candidates.append((score, entry["Name"], entry["ID"]))
    candidates.sort(reverse=True)
    return [f"{name} ({id_})" for _, name, id_ in candidates[:top_n]]
 # =========================
 # Generic request with retries & caching
 # =========================
 def request_with_retries_generic(api_name, url, params=None, headers=None, timeout=TIMEOUT_DEFAULT, max_retries=MAX_RETRIES_DEFAULT, backoff=BACKOFF_FACTOR_DEFAULT):
    cache_key = url + (json.dumps(params, sort_keys=True, ensure_ascii=False) if params else "")
    if cache_key in CACHE:
        logger.debug(f"[Cache] {api_name}: {cache_key}")
        return CACHE[cache_key]
    retries = 0
    while retries < max_retries:
        try:
            r = requests.get(url, params=params, headers=headers or HEADERS, timeout=timeout)
            if r.status_code == 200:
                try:
                    data = r.json()
                except Exception:
                    data = r.text
                CACHE[cache_key] = data
                FAIL_COUNTER[api_name] = 0
                logger.debug(f"[{api_name}] Erfolgreiche Antwort für {url}")
                return data
            else:
                logger.warning(f"[{api_name}] HTTP {r.status_code} für {url}")
                raise ValueError(f"HTTP {r.status_code}")
        except Exception as e:
            retries += 1
            wait = backoff ** retries
            logger.warning(f"[{api_name}] Fehler ({retries}/{max_retries}) für {url}: {e}. Warte {wait}s")
            time.sleep(wait)
    FAIL_COUNTER[api_name] += 1
    if FAIL_COUNTER[api_name] >= 10:
        API_ACTIVE[api_name] = False
        logger.error(f"[{api_name}] Deaktiviere API nach zu vielen Fehlern.")
    return None
 # =========================
 # GND / Wikidata (bestehend)
 # =========================
 def batch_query_gnd(terms):
    results = {}
    if not API_ACTIVE.get("gnd", False):
        for t in terms: results[t] = ""
        return results
    logger.info(f"[GND] Starte GND-Abfragen für {len(terms)} Terme")
    start = time.time()
    for idx, t in enumerate(terms, start=1):
        logger.debug(f"[GND] ({idx}/{len(terms)}) Anfrage für '{t}'")
        url = "https://lobid.org/gnd/search"
        params = {"q": t, "format": "json"}
        data = request_with_retries_generic("gnd", url, params=params, headers=HEADERS, timeout=TIMEOUT_DEFAULT)
        top = ""
        try:
            if data and "member" in data:
                cands = [(doc.get("preferredName","") or doc.get("name",""),
                          SequenceMatcher(None, t.lower(), (doc.get("preferredName","") or doc.get("name","")).lower()).ratio())
                         for doc in data["member"] if doc.get("preferredName","") or doc.get("name","")]
                cands = [c for c in cands if c[1] >= 0.75]
                if cands:
                    top = sorted(cands, key=lambda x: x[1], reverse=True)[0][0]
        except Exception as e:
            logger.debug(f"[GND] Fehler bei Verarbeitung für '{t}': {e}")
        results[t] = top
    elapsed = time.time() - start
    logger.info(f"[GND] Fertig. Dauer: {elapsed:.1f}s")
    return results
 def batch_query_wikidata(terms):
    results = {}
    if not API_ACTIVE.get("wikidata", False):
        for t in terms: results[t] = ""
        return results
    logger.info(f"[WD] Starte Wikidata-Abfragen für {len(terms)} Terme")
    start = time.time()
    for idx, t in enumerate(terms, start=1):
        logger.debug(f"[WD] ({idx}/{len(terms)}) Anfrage für '{t}'")
        url = "https://www.wikidata.org/w/api.php"
        params = {"action": "wbsearchentities", "search": t, "language": "de", "format": "json"}
        data = request_with_retries_generic("wikidata", url, params=params, headers=HEADERS, timeout=TIMEOUT_DEFAULT)
        top = ""
        try:
            if data and "search" in data:
                cands = [(e.get("label",""), SequenceMatcher(None, t.lower(), e.get("label","").lower()).ratio())
                         for e in data["search"] if e.get("label","")]
                cands = [c for c in cands if c[1] >= 0.70]
                if cands:
                    top = sorted(cands, key=lambda x: x[1], reverse=True)[0][0]
        except Exception as e:
            logger.debug(f"[WD] Fehler bei Verarbeitung für '{t}': {e}")
        results[t] = top
    elapsed = time.time() - start
    logger.info(f"[WD] Fertig. Dauer: {elapsed:.1f}s")
    return results
 # =========================
 # Getty AAT Abfrage – robust & API-polite (requests)
 # =========================
 def batch_query_getty_aat(terms):
    results = {}
    if not API_ACTIVE.get("aat", False):
        for t in terms: results[t] = ""
        return results
    endpoint = "https://vocab.getty.edu/sparql"
    headers = {"Accept": "application/sparql-results+json", "User-Agent": HEADERS.get("User-Agent")}
    TIMEOUT = 8
    MAX_RETRIES = 3
    BACKOFF_FACTOR = 2
    FAIL_LIMIT = 5
    fail_counter_local = 0
    logger.info(f"[AAT] Starte Getty AAT-Abgleich für {len(terms)} Terme")
    start_all = time.time()
    for idx, term in enumerate(terms, start=1):
        term_norm = lemmatize_term(normalize_text(term))
        tokens = compound_split(term_norm)
        logger.debug(f"[AAT] ({idx}/{len(terms)}) Begriff '{term}' -> Tokens: {tokens}")
        query_fragments = []
        for tkn in tokens:
            t_escaped = tkn.replace('"', '\\"')
            qf = f"""
                ?concept skos:prefLabel ?label .
                FILTER(lang(?label)='de' && CONTAINS(LCASE(?label), LCASE("{t_escaped}")))
            """
            query_fragments.append(f"{{ {qf} }}")
        query_body = " UNION ".join(query_fragments) if query_fragments else ""
        query = f"PREFIX skos: <http://www.w3.org/2004/02/skos/core#> SELECT ?label ?concept WHERE {{ {query_body} }} LIMIT 10"
        retries = 0
        success = False
        start_term = time.time()
        while retries < MAX_RETRIES and not success:
            try:
                logger.debug(f"[AAT] Anfrage (Retry {retries}) für '{term}'")
                r = requests.get(endpoint, params={"query": query}, headers=headers, timeout=TIMEOUT)
                if r.status_code != 200:
                    raise ValueError(f"HTTP {r.status_code}")
                ret = r.json()
                candidates = [(b['label']['value'], b['concept']['value']) for b in ret.get("results", {}).get("bindings", [])]
                if candidates:
                    scored = [
                        (c[0], c[1], SequenceMatcher(None, term_norm, lemmatize_term(normalize_text(c[0]))).ratio())
                        for c in candidates
                    ]
                    top = max(scored, key=lambda x: x[2])
                    results[term] = top[0]
                    logger.debug(f"[AAT] Treffer für '{term}': {results[term]} (Score: {top[2]:.3f})")
                else:
                    results[term] = ""
                    logger.debug(f"[AAT] Kein Treffer für '{term}'")
                success = True
            except Exception as e:
                retries += 1
                wait = BACKOFF_FACTOR ** retries
                logger.warning(f"[AAT] Fehler ({retries}/{MAX_RETRIES}) für '{term}': {e} – warte {wait}s")
                time.sleep(wait)
                if retries == MAX_RETRIES:
                    results[term] = ""
                    fail_counter_local += 1
            # polite delay
            time.sleep(1.0)
        elapsed_term = time.time() - start_term
        logger.debug(f"[AAT] Dauer für '{term}': {elapsed_term:.2f}s")
        if fail_counter_local >= FAIL_LIMIT:
            logger.error("[AAT] Zu viele Fehler lokal - breche AAT-Abfragen ab.")
            for t_rem in terms[idx:]:
                results[t_rem] = ""
            FAIL_COUNTER["aat"] += fail_counter_local
            API_ACTIVE["aat"] = False
            break
    elapsed_all = time.time() - start_all
    logger.info(f"[AAT] Getty AAT-Abgleich abgeschlossen. Dauer: {elapsed_all:.1f}s")
    return results
 # =========================
 # Markierung / Export (Excel/ODS)
 # =========================
 def mark_norm_hits(file_path):
    ext = file_path.suffix.lower()
    try:
        if ext in [".xlsx", ".xls"]:
            from openpyxl import load_workbook
            from openpyxl.styles import PatternFill
            wb = load_workbook(file_path)
            ws = wb.active
            col_map = {cell.value: idx+1 for idx, cell in enumerate(ws[1])}
            norm_col = col_map.get("Norm_Treffer", None)
            if not norm_col:
                logger.debug("Spalte 'Norm_Treffer' nicht gefunden (mark_norm_hits).")
                wb.save(file_path)
                return
            green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
            red_fill   = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
            for row in ws.iter_rows(min_row=2, min_col=norm_col, max_col=norm_col):
                cell = row[0]
                if cell.value and cell.value != "KEIN TREFFER":
                    cell.fill = green_fill
                else:
                    cell.fill = red_fill
            wb.save(file_path)
        elif ext==".ods":
            df = pd.read_excel(file_path, engine="odf")
            df["Norm_Status"] = df["Norm_Treffer"].apply(lambda x: "Treffer" if pd.notna(x) and str(x).strip() and x!="KEIN TREFFER" else "Kein Treffer")
            df.to_excel(file_path, index=False, engine="odf")
    except Exception as e:
        logger.warning(f"Fehler beim Markieren der Treffer in {file_path}: {e}")
 # =========================
 # Fehlende Begriffe -> separate Datei
 # =========================
 def export_missing_terms(out_df, output_file):
    missing_df = out_df[
        (out_df["Norm_Treffer"] == "KEIN TREFFER") &
        (out_df["Norm_Vorschlag"].isna() | (out_df["Norm_Vorschlag"].str.strip() == ""))
    ][["Begriff"]].drop_duplicates()
    count_missing = len(missing_df)
    logger.info(f"Anzahl Begriffe ohne Treffer und Vorschläge: {count_missing}")
    if count_missing == 0:
        return
    ext = output_file.suffix.lower()
    base_name = output_file.stem
    missing_file = OUTPUT_DIR / f"{base_name}_fehlende_Begriffe{output_file.suffix}"
    version = 1
    while missing_file.exists():
        missing_file = OUTPUT_DIR / f"{base_name}_fehlende_Begriffe_({version}){output_file.suffix}"
        version += 1
    try:
        if ext in [".xlsx", ".xls"]:
            missing_df.to_excel(missing_file, index=False, engine="openpyxl")
        elif ext == ".ods":
            missing_df.to_excel(missing_file, index=False, engine="odf")
        else:
            missing_df.to_csv(missing_file, index=False, sep=";")
        logger.info(f"Fehlende Begriffe gespeichert: {missing_file}")
    except Exception as e:
        logger.error(f"Fehler beim Speichern der fehlenden Begriffe: {e}")
 # =========================
 # Haupt-Loop: Verarbeitung Input-Dateien
 # =========================
 def process_files():
    overall_start = time.time()
    try:
        norm_dict, stem_index, lemma_norm_map = load_normvokabular(NORMVOC_FILE)
    except Exception as e:
        logger.error("Normvokabular konnte nicht geladen werden. Beende.")
        raise
    total_terms = 0
    total_hits = 0
    if not INPUT_DIR.exists():
        logger.error(f"Eingabeordner {INPUT_DIR} fehlt")
        raise SystemExit(1)
    files = list(INPUT_DIR.glob("*"))
    if not files:
        logger.info("Keine Dateien gefunden")
        return
    logger.info(f"Starte Verarbeitung von {len(files)} Dateien")
    for file_idx, file_path in enumerate(files, start=1):
        if not file_path.suffix.lower() in [".csv",".ods",".xls",".xlsx"]:
            logger.debug(f"Übersprungen (kein unterstütztes Format): {file_path.name}")
            continue
        logger.info(f"[Datei {file_idx}/{len(files)}] Verarbeite: {file_path.name}")
        file_start = time.time()
        try:
            if file_path.suffix.lower() == ".csv":
                df = pd.read_csv(file_path)
            else:
                df = pd.read_excel(file_path, engine="odf" if file_path.suffix.lower()==".ods" else None)
        except Exception as e:
            logger.error(f"Fehler beim Lesen von {file_path.name}: {e}")
            continue
        df = df.dropna(how="all")
        df.columns = [str(c).strip() for c in df.columns]
        besch_col = next((c for c in df.columns if "Objektbeschreibung" in c), None)
        box_col = next((c for c in df.columns if "Objekt/Ebene" in c), None)
        urh_col = next((c for c in df.columns if "Urheber" in c), None)
        if not besch_col:
            logger.warning(f"Spalte 'Objektbeschreibung' nicht gefunden in {file_path.name}. Datei übersprungen.")
            continue
        row_terms_map = []
        for r_idx, row in enumerate(df.itertuples(index=False), start=1):
            try:
                besch = str(row[df.columns.get_loc(besch_col)]).strip() if pd.notna(row[df.columns.get_loc(besch_col)]) else ""
            except Exception:
                besch = str(row[besch_col]).strip() if pd.notna(row[besch_col]) else ""
            if not besch:
                continue
            obj_box = row[df.columns.get_loc(box_col)] if box_col and box_col in df.columns else ""
            urheber = row[df.columns.get_loc(urh_col)] if urh_col and urh_col in df.columns else ""
            clauses = [c.strip() for c in re.split(r",", besch) if c.strip()]
            terms = []
            for clause in clauses:
                parts = [p.strip() for p in re.split(r"\s+", clause) if p.strip()]
                for p in parts:
                    if p.lower() in STOPWORDS:
                        continue
                    if re.fullmatch(r"\d+", p):
                        continue
                    terms.append(p)
            row_terms_map.append((obj_box, urheber, terms))
            if (r_idx % 200) == 0:
                logger.debug(f"[{file_path.name}] Zeile {r_idx} verarbeitet")
        all_terms = list({t for _,_,terms in row_terms_map for t in terms})
        logger.info(f"[{file_path.name}] Gefundene unique Terme: {len(all_terms)}")
        total_unique_terms = len(all_terms)
        # API-Abfragen
        t0 = time.time()
        gnd_results = batch_query_gnd(all_terms)
        t1 = time.time()
        logger.info(f"[{file_path.name}] GND-Abfragen Dauer: {t1-t0:.1f}s")
        wd_results = batch_query_wikidata(all_terms)
        t2 = time.time()
        logger.info(f"[{file_path.name}] Wikidata-Abfragen Dauer: {t2-t1:.1f}s")
        aat_results = batch_query_getty_aat(all_terms) if API_ACTIVE.get("aat", False) else {t:"" for t in all_terms}
        t3 = time.time()
        logger.info(f"[{file_path.name}] AAT-Abfragen Dauer: {t3-t2:.1f}s")
        # Build output rows
        output_rows = []
        processed_count = 0
        for obj_box, urheber, terms in row_terms_map:
            for term in terms:
                norm_name, norm_id, suggestions = map_to_norm(term, norm_dict, stem_index, lemma_norm_map)
                total_terms += 1
                if norm_name != "KEIN TREFFER":
                    total_hits += 1
                out_row = {
                    "Box": obj_box,
                    "Objekt/Ebene": obj_box,
                    "Urheber": urheber,
                    "Begriff": term,
                    "Norm_Treffer": norm_name,
                    "Norm_ID": norm_id,
                    "Norm_Vorschlag": ", ".join(suggestions) if suggestions else "",
                    "GND_Top1": gnd_results.get(term,""),
                    "WD_Top1": wd_results.get(term,""),
                    "AAT_Top1": aat_results.get(term,"")
                }
                output_rows.append(out_row)
                processed_count += 1
                if (processed_count % 200) == 0:
                    logger.debug(f"[{file_path.name}] {processed_count}/{total_unique_terms} Terme verarbeitet")
        out_df = pd.DataFrame(output_rows)
        output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}{file_path.suffix}"
        version = 1
        while output_file.exists():
            output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}_({version}){file_path.suffix}"
            version += 1
        engine = "odf" if output_file.suffix.lower()==".ods" else None
        try:
            out_df.to_excel(output_file, index=False, engine=engine)
            logger.info(f"[{file_path.name}] Auswertung gespeichert: {output_file}")
        except Exception as e:
            logger.error(f"[{file_path.name}] Fehler beim Speichern der Auswertung {output_file}: {e}")
            continue
        export_missing_terms(out_df, output_file)
        mark_norm_hits(output_file)
        file_elapsed = time.time() - file_start
        logger.info(f"[Datei {file_idx}/{len(files)}] Fertig ({file_elapsed:.1f}s)")
    overall_elapsed = time.time() - overall_start
    logger.info(f"Fertig. Gesamt: {total_terms} Begriffe, {total_hits} Treffer im Normvokabular. Gesamtzeit: {overall_elapsed:.1f}s")
 # =========================
 # Main
 # =========================
 if __name__ == "__main__":
    try:
        process_files()
    except KeyboardInterrupt:
        logger.warning("Abbruch durch Benutzer (KeyboardInterrupt).")
    except SystemExit:
        logger.warning("SystemExit aufgetreten.")
    except Exception as e:
        logger.exception(f"Ungefangener Fehler: {e}")
    finally:
        # Stop logger (flush remaining logs)
        try:
            save_cache()
        except Exception:
            pass
        try:
            logger.info("Beende.")
            logger.stop()
        except Exception:
            pass
--- a/NormVokabular_Mapper_Wrapper.py
+++ b/NormVokabular_Mapper_Wrapper.py
@ -0,0 +1,46 @@
 import subprocess
 import json
 import sys
 from pathlib import Path
 def run_mapper(term):
    """
    Ruft das bestehende mapper script auf und liefert Vorschläge zurück.
    Erwartet, dass das mapper script eine JSON-Ausgabe liefert:
    {
        "term": "Begriff",
        "norm_name": "Normierter Treffer oder KEIN TREFFER",
        "norm_id": "ID",
        "suggestions": ["Vorschlag1", "Vorschlag2", "Vorschlag3"]
    }
    """
    mapper_script = Path("/home/jarnold/projects/GND-Skript Test/NormVokabular_Mapper_1.2.py")  # dein bestehendes Mapper-Skript
    if not mapper_script.exists():
        raise FileNotFoundError(f"{mapper_script} nicht gefunden")
    # Übergabe als JSON-String
    input_json = json.dumps({"term": term})
    # Aufruf via subprocess
    result = subprocess.run(
        [sys.executable, str(mapper_script), input_json],
        capture_output=True,
        text=True
    )
    if result.returncode != 0:
        raise RuntimeError(f"Mapper Fehler: {result.stderr}")
    try:
        output = json.loads(result.stdout)
    except Exception as e:
        raise ValueError(f"Ungültige Ausgabe vom Mapper: {e}")
    return output
 if __name__ == "__main__":
    if len(sys.argv) > 1:
        term = sys.argv[1]
        output = run_mapper(term)
        print(json.dumps(output, ensure_ascii=False))
--- a/Glotin.ods
+++ b/Glotin.ods
--- a/API.ods
+++ b/API.ods
--- a/Tryout/NVTest.py
+++ b/Tryout/NVTest.py
@ -0,0 +1,101 @@
 import pandas as pd
 import requests
 import time
 import os
 def match_gnd(token, delay=0.3):
    """GND-Abfrage für ein Schlagwort, gibt erstes Ergebnis zurück"""
    url = f"https://lobid.org/gnd/search?q={token}&format=json"
    try:
        resp = requests.get(url, timeout=5)
        if resp.status_code == 200:
            data = resp.json()
            if 'member' in data and data['member']:
                first = data['member'][0]
                return first.get('preferredName'), first.get('gndIdentifier')
    except Exception as e:
        print(f"Fehler bei GND-Abfrage für '{token}': {e}")
    time.sleep(delay)
    return None, None
 def load_exlibris_refs(path):
    """CSV einlesen, Scan-Zuordnung, Platzhalter-Inventarnummer, GND-Abgleich"""
    df = pd.read_csv(path, dtype=str, header=0)
    # erste Spalte leer? → "Kürzel"
    if df.columns[0].strip() == '':
        df.rename(columns={df.columns[0]: 'Kürzel'}, inplace=True)
    df.fillna('', inplace=True)
    # Scan-Level-Spalten
    level_cols = [c for c in df.columns if c.strip() in ['0','1','2','3','4']]
    obj_list = []
    current_obj = None
    placeholder_counter = 1
    for _, row in df.iterrows():
        has_0 = row['0'].strip() if '0' in df.columns else ''
        row_refs = []
        for c in level_cols:
            val = row[c].strip()
            if val:
                row_refs.append({'level': c, 'scan_ref': val})
        if has_0:
            if current_obj:
                obj_list.append(current_obj)
            core_data = {col: row[col] for col in df.columns if col not in level_cols}
            # Inventarnummer prüfen
            inv = core_data.get('Inventarnummer','').strip()
            if not inv:
                core_data['Inventarnummer'] = f'PL-{placeholder_counter:04d}'
                placeholder_counter += 1
            # GND-Abgleich
            obj_descr = core_data.get('Objektbeschreibung','')
            gnd_name, gnd_id = None, None
            if obj_descr:
                tokens = [t.strip() for t in obj_descr.split(',') if t.strip()]
                for t in tokens:
                    name, gid = match_gnd(t)
                    if gid:
                        gnd_name = name
                        gnd_id = gid
                        break
            core_data['GND_Name'] = gnd_name
            core_data['GND_ID'] = gnd_id
            current_obj = core_data
            current_obj['ScanReferenzen'] = row_refs
        else:
            if current_obj:
                current_obj['ScanReferenzen'].extend(row_refs)
    if current_obj:
        obj_list.append(current_obj)
    out_df = pd.DataFrame(obj_list)
    core_fields = ['Kürzel','Inventarnummer','Standort','Jahr','Urheber','Eigner',
                   'Objektbeschreibung','Material','Maße (in cm)',
                   'Objekttyp','Inschrift','Anmerkungen','ScanReferenzen',
                   'GND_Name','GND_ID']
    available = [c for c in core_fields if c in out_df.columns]
    return out_df[available]
 # ====================
 # Hauptteil
 # ====================
 if __name__ == "__main__":
    # CSV im gleichen Ordner suchen
    csv_files = [f for f in os.listdir('.') if f.lower().endswith('.csv')]
    if not csv_files:
        print("Keine CSV-Datei im aktuellen Ordner gefunden.")
        exit(1)
    # nimm die erste gefundene CSV
    input_csv = csv_files[0]
    print(f"Verwende CSV-Datei: {input_csv}")
    df = load_exlibris_refs(input_csv)
    # Ergebnis als Testergebnis.csv speichern
    output_file = "Testergebnis.csv"
    df.to_csv(output_file, index=False)
    print(f"Aufbereitete Daten gespeichert als {output_file}")
--- a/VLG.py
+++ b/VLG.py
@ -0,0 +1,190 @@
 #!/usr/bin/env python3
 """
 VLG_AAT.py Gruppierung, Auflösung "Objektbeschreibung"
 NOCH OHNE AAT-ABGLEICH
 - Prüft ezodf in aktueller Umgebung
 - Liest ODS aus "Input CSV/"
 - Extrahiert Begriffe aus "Objektbeschreibung"
 - Lemmatisierung (Spacy) + Stopwortfilter
 - Subtokenisierung komplexer Phrasen
 - Zählt Häufigkeiten
 - Ausgabe ODS / CSV-Fallback in "Auswertung Ergebnisse"
 """
 import os
 import sys
 import logging
 from collections import Counter
 import pandas as pd
 import spacy
 # ---------------------------
 # Logging
 # ---------------------------
 logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
 # ---------------------------
 # ezodf prüfen
 # ---------------------------
 try:
    import ezodf
    EZODF_AVAILABLE = True
    logging.info(f"ezodf erkannt")
 except ImportError:
    EZODF_AVAILABLE = False
    logging.error("ezodf konnte nicht importiert werden!")
    logging.error("Möglicherweise nutzen Sie nicht die Python-Umgebung, in der ezodf installiert ist.")
    logging.error(f"Aktuelle Python-Executable: {sys.executable}")
    logging.error("Bitte prüfen Sie Ihre venv oder installieren Sie ezodf in dieser Umgebung:")
    logging.error("    python -m pip install ezodf")
    sys.exit(1)
 # ---------------------------
 # Spacy laden
 # ---------------------------
 try:
    nlp = spacy.load("de_core_news_sm")
    logging.info("Spacy-Modell geladen.")
 except Exception as e:
    logging.error(f"Spacy-Modell konnte nicht geladen werden: {e}")
    sys.exit(1)
 # ---------------------------
 # Konfiguration
 # ---------------------------
 INPUT_FOLDER = "Input CSV"
 OUTPUT_FOLDER = "Auswertung Ergebnisse"
 INPUT_FILENAME = None
 TARGET_COLUMN = "Objektbeschreibung"
 STOPWORDS = {"mit", "auf", "von", "und", "der", "die", "das"}  # erweiterbar
 MAPPING = {  # Projektinterne Sonderfälle
    "exlibris": "exlibris",
    "wappen": "wappen"
 }
 # ---------------------------
 # Funktionen
 # ---------------------------
 def find_input_file(folder: str, filename_hint: str = None):
    if not os.path.isdir(folder):
        raise FileNotFoundError(f"Input-Ordner '{folder}' existiert nicht.")
    files = [f for f in os.listdir(folder) if f.lower().endswith(".ods")]
    if filename_hint:
        for f in files:
            if f == filename_hint or filename_hint in f:
                return os.path.join(folder, f)
    if not files:
        raise FileNotFoundError(f"Keine .ods-Dateien in '{folder}' gefunden.")
    return os.path.join(folder, files[0])
 def read_ods_first_sheet(path: str) -> pd.DataFrame:
    """Lädt ODS, erkennt automatisch Header-Zeile."""
    try:
        df = pd.read_excel(path, engine="odf", header=None)
        logging.info("ODS mit pandas + odfpy geladen.")
    except Exception as e1:
        logging.warning(f"pandas + odfpy konnte ODS nicht lesen ({e1}).")
        if not EZODF_AVAILABLE:
            raise RuntimeError("ezodf nicht installiert und pandas + odfpy fehlgeschlagen.")
        doc = ezodf.opendoc(path)
        sheet = doc.sheets[0]
        data = []
        for row in sheet.rows():
            values = [c.value if hasattr(c, "value") else "" for c in row]
            data.append(values)
        df = pd.DataFrame(data)
        logging.info("ODS mit ezodf geladen.")
    # Header-Zeile automatisch finden
    header_row_index = None
    for i, row in df.iterrows():
        row_str = row.fillna("").astype(str).str.lower()
        if any("objektbeschreibung" in str(cell) for cell in row_str):
            header_row_index = i
            break
    if header_row_index is None:
        raise KeyError("Keine Header-Zeile mit 'Objektbeschreibung' gefunden.")
    df.columns = df.iloc[header_row_index]
    df = df.iloc[header_row_index + 1:].reset_index(drop=True)
    return df
 def tokenize_and_lemmatize(series: pd.Series) -> list:
    """Tokenisiert, entfernt Stopwords, wendet Mapping + Spacy-Lemmatisierung an."""
    series = series.fillna("").astype(str).str.strip().str.lower()
    all_terms = []
    for text in series:
        if not text:
            continue
        # Komma-Split
        for part in [p.strip() for p in text.split(",") if p.strip()]:
            # Subtokenisierung via Spacy
            doc = nlp(part)
            for token in doc:
                lemma = token.lemma_.lower()
                if lemma in STOPWORDS:
                    continue
                lemma = MAPPING.get(lemma, lemma)
                if lemma:
                    all_terms.append(lemma)
    return all_terms
 def write_output(rows: list, outpath: str):
    if EZODF_AVAILABLE:
        if not rows:
            logging.warning("Keine Daten zum Schreiben.")
            return
        keys = list(rows[0].keys())
        doc = ezodf.newdoc(doctype="ods", filename=outpath)
        sheet = ezodf.Sheet("Auswertung", size=(len(rows)+1, len(keys)))
        doc.sheets += sheet
        for ci, k in enumerate(keys):
            sheet[0, ci].set_value(k)
        for ri, row in enumerate(rows, start=1):
            for ci, k in enumerate(keys):
                sheet[ri, ci].set_value(row.get(k, ""))
        doc.save()
        logging.info(f"ODS geschrieben: {outpath}")
    else:
        csv_path = os.path.splitext(outpath)[0] + ".csv"
        df = pd.DataFrame(rows)
        df.to_csv(csv_path, index=False, sep=";", encoding="utf-8")
        logging.info(f"CSV-Fallback geschrieben: {csv_path}")
 # ---------------------------
 # Hauptfunktion
 # ---------------------------
 def main(input_folder=INPUT_FOLDER, input_filename=INPUT_FILENAME):
    input_path = find_input_file(input_folder, filename_hint=input_filename)
    input_basename = os.path.splitext(os.path.basename(input_path))[0]
    logging.info(f"Verarbeite Datei: {input_path}")
    df = read_ods_first_sheet(input_path)
    logging.info(f"Geladene Spalten: {list(df.columns)}")
    if TARGET_COLUMN.lower() not in [str(c).lower() for c in df.columns]:
        raise KeyError(f"Spalte '{TARGET_COLUMN}' nicht gefunden.")
    terms = tokenize_and_lemmatize(df[TARGET_COLUMN])
    logging.info(f"Gefundene Begriffe: {len(terms)}")
    counts = Counter(terms)
    sorted_terms = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)
    rows = [{"Begriff": term, "Anzahl": freq} for term, freq in sorted_terms]
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    out_name = f"{input_basename} Auswertung.ods"
    out_path = os.path.join(OUTPUT_FOLDER, out_name)
    write_output(rows, out_path)
    logging.info("Fertig.")
 if __name__ == "__main__":
    argv = sys.argv[1:]
    folder = INPUT_FOLDER
    fname = INPUT_FILENAME
    if len(argv) >= 1:
        folder = argv[0]
    if len(argv) >= 2:
        fname = argv[1]
    main(input_folder=folder, input_filename=fname)
--- a/VLG_API_multi.py
+++ b/VLG_API_multi.py
@ -0,0 +1,262 @@
 import os
 import sys
 import time
 import json
 import requests
 import pandas as pd
 from pathlib import Path
 from difflib import SequenceMatcher
 import argparse
 # =========================
 # Argumente / Dry-Run
 # =========================
 parser = argparse.ArgumentParser()
 parser.add_argument('--dry-run', action='store_true', help='API-Abfragen simulieren')
 args = parser.parse_args()
 DRY_RUN = args.dry_run
 # =========================
 # Konfiguration
 # =========================
 INPUT_DIR = Path("Input CSV")
 OUTPUT_DIR = Path("Auswertung Ergebnisse")
 OUTPUT_DIR.mkdir(exist_ok=True)
 TIMEOUT = 5
 MAX_RETRIES = 3
 BACKOFF_FACTOR = 2
 MAX_CONSECUTIVE_FAILURES = 10
 CACHE_FILE = "api_cache.json"
 if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE, "r", encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 API_ACTIVE = {"gnd": True, "wikidata": True}
 FAIL_COUNTER = {"gnd": 0, "wikidata": 0}
 HEADERS = {"User-Agent": "VLG_API_multi/1.0 (projekt@example.com)"}
 # =========================
 # Logging
 # =========================
 def log(level, msg):
    print(f"[{level}] {msg}")
 # =========================
 # Cache speichern
 # =========================
 def save_cache():
    with open(CACHE_FILE, "w", encoding="utf-8") as f:
        json.dump(CACHE, f, indent=2, ensure_ascii=False)
 # =========================
 # Request mit Retry & Backoff
 # =========================
 def request_with_retries(api_name, url, params=None):
    if DRY_RUN:
        return {"dummy": True}
    if not API_ACTIVE[api_name]:
        return None
    cache_key = url + (str(params) if params else "")
    if cache_key in CACHE:
        return CACHE[cache_key]
    retries = 0
    while retries < MAX_RETRIES:
        try:
            r = requests.get(url, params=params, timeout=TIMEOUT, headers=HEADERS)
            if r.status_code == 200:
                try:
                    data = r.json()
                except:
                    data = r.text
                CACHE[cache_key] = data
                save_cache()
                FAIL_COUNTER[api_name] = 0
                return data
            elif r.status_code in [403, 429]:
                log("ERROR", f"{api_name.upper()} HTTP {r.status_code} – Stopschalter aktiviert")
                API_ACTIVE[api_name] = False
                return None
            else:
                log("ERROR", f"{api_name.upper()} HTTP {r.status_code}")
        except requests.exceptions.Timeout:
            log("ERROR", f"Timeout bei {api_name.upper()}")
        except Exception as e:
            log("ERROR", f"Fehler bei {api_name.upper()}: {e}")
        retries += 1
        sleep_time = min(BACKOFF_FACTOR ** retries, 30)
        time.sleep(sleep_time)
    FAIL_COUNTER[api_name] += 1
    if FAIL_COUNTER[api_name] >= MAX_CONSECUTIVE_FAILURES:
        log("CRITICAL", f"{MAX_CONSECUTIVE_FAILURES} Fehler bei {api_name.upper()} – Stopschalter aktiviert")
        API_ACTIVE[api_name] = False
    return None
 # =========================
 # API-Abfragen mit Confidence
 # =========================
 def query_gnd(term, min_conf=0.6):
    if DRY_RUN or not API_ACTIVE["gnd"]:
        return "TEST_GND", 1.0
    url = f"https://lobid.org/gnd/search?q={term}&format=json"
    data = request_with_retries("gnd", url)
    if not data:
        return "API nicht erreichbar", 0.0
    results = []
    scores = []
    for doc in data.get("member", []):
        name = doc.get("preferredName", "")
        conf = SequenceMatcher(None, term.lower(), name.lower()).ratio()
        if conf >= min_conf:
            results.append(name)
            scores.append(conf)
    if results:
        return ", ".join(results), max(scores)
    return "ohne Ergebnis", 0.0
 def query_wikidata(term, min_conf=0.5):
    if DRY_RUN or not API_ACTIVE["wikidata"]:
        return "TEST_WD", 1.0
    url = "https://www.wikidata.org/w/api.php"
    params = {"action": "wbsearchentities", "search": term, "language": "de", "format": "json"}
    data = request_with_retries("wikidata", url, params)
    if not data:
        return "API nicht erreichbar", 0.0
    results = []
    scores = []
    for entry in data.get("search", []):
        match_info = entry.get("match", {})
        score = match_info.get("score", 0.0)
        if score >= min_conf:
            results.append(entry["label"])
            scores.append(score)
    if results:
        return ", ".join(results), max(scores)
    return "ohne Ergebnis", 0.0
 # =========================
 # Input laden
 # =========================
 def load_input_file(file_path):
    try:
        if file_path.suffix.lower() == ".ods":
            df = pd.read_excel(file_path, engine="odf", header=None)
        elif file_path.suffix.lower() == ".xlsx":
            df = pd.read_excel(file_path, engine="openpyxl", header=None)
        elif file_path.suffix.lower() == ".csv":
            df = pd.read_csv(file_path, header=None)
        else:
            log("WARNING", f"Unbekanntes Dateiformat: {file_path.name}")
            return None
        return df
    except Exception as e:
        log("ERROR", f"Fehler beim Laden von {file_path.name}: {e}")
        return None
 # =========================
 # Header-Zeile suchen
 # =========================
 def find_header_row(df, keywords=["objektbeschreibung", "objekt/ebene"]):
    for i, row in df.iterrows():
        row_lower = [str(cell).lower() if pd.notna(cell) else "" for cell in row]
        if any(kw in cell for kw in keywords for cell in row_lower):
            return i, row_lower
    return None, None
 # =========================
 # Verarbeitung
 # =========================
 def process_files():
    all_terms = []
    output_rows = []
    for file_path in INPUT_DIR.glob("*"):
        if not file_path.suffix.lower() in [".csv", ".xlsx", ".ods"]:
            continue
        log("INFO", f"Verarbeite {file_path.name}")
        df = load_input_file(file_path)
        if df is None:
            continue
        header_idx, header_row = find_header_row(df)
        if header_idx is None:
            log("WARNING", f"Keine Header-Zeile gefunden in {file_path.name}")
            continue
        df.columns = header_row
        df = df.iloc[header_idx+1:].reset_index(drop=True)
        col_objdesc = next((col for col in df.columns if "objektbeschreibung" in str(col).lower()), None)
        col_objlevel = next((col for col in df.columns if "objekt/ebene" in str(col).lower()), None)
        if not col_objdesc:
            log("WARNING", f"Keine Spalte 'Objektbeschreibung' in {file_path.name}")
            continue
        term_list = []
        obj_level_list = []
        for _, row in df.iterrows():
            terms = str(row[col_objdesc]) if pd.notna(row[col_objdesc]) else ""
            if not terms:
                continue
            for term in [t.strip() for t in terms.split(",") if t.strip()]:
                term_list.append(term)
                obj_level_list.append(row[col_objlevel] if col_objlevel and pd.notna(row[col_objlevel]) else "")
        # API-Abfragen
        gnd_results = []
        gnd_scores = []
        wikidata_results = []
        wikidata_scores = []
        for term in term_list:
            gnd_res, gnd_conf = query_gnd(term)
            wikidata_res, wd_conf = query_wikidata(term)
            gnd_results.append(gnd_res)
            gnd_scores.append(gnd_conf)
            wikidata_results.append(wikidata_res)
            wikidata_scores.append(wd_conf)
        for idx, term in enumerate(term_list):
            output_rows.append({
                "Begriff": term,
                "Quelle": file_path.name,
                "Objekt/Ebene": obj_level_list[idx],
                "GND": gnd_results[idx],
                "GND_Confidence": gnd_scores[idx],
                "Wikidata": wikidata_results[idx],
                "Wikidata_Confidence": wikidata_scores[idx]
            })
        all_terms.extend(term_list)
    # Hauptoutput
    out_df = pd.DataFrame(output_rows)
    out_file = OUTPUT_DIR / "Auswertung_gesamt.ods"
    out_df.to_excel(out_file, index=False, engine="odf")
    log("INFO", f"Hauptauswertung gespeichert: {out_file}")
    # Rohdatei
    raw_terms = pd.Series(all_terms).value_counts().reset_index()
    raw_terms.columns = ["Begriff", "Häufigkeit"]
    raw_file = OUTPUT_DIR / "Rohbegriffe.ods"
    raw_terms.to_excel(raw_file, index=False, engine="odf")
    log("INFO", f"Rohbegriffe gespeichert: {raw_file}")
 # =========================
 # Main
 # =========================
 if __name__ == "__main__":
    if not INPUT_DIR.exists():
        log("CRITICAL", f"Eingabeordner {INPUT_DIR} fehlt!")
        sys.exit(1)
    process_files()
--- a/api_cache.json
+++ b/api_cache.json
--- a/config.json
+++ b/config.json
@ -0,0 +1,9 @@
 {
  "normvokabular_path": "/home/jarnold/projects/GND-Skript Test/Input CSV/Normvokabular_INTERN/NV_MASTER.ods",
  "max_suggestions": 3,
  "color_hit": "#C6EFCE",
  "color_miss": "#FFC7CE",
  "use_rapidfuzz": false,
  "use_spacy": false,
  "autosave": false
 }
--- a/mapper.py
+++ b/mapper.py
@ -0,0 +1,371 @@
 import os
 import sys
 import re
 import time
 import json
 import pandas as pd
 import requests
 from pathlib import Path
 from collections import defaultdict
 from difflib import SequenceMatcher
 # RapidFuzz für Token-basierte Fuzzy-Suche
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
    print("RapidFuzz verfügbar")
 except ImportError:
    RAPIDFUZZ_AVAILABLE = False
    print("RapidFuzz nicht verfügbar – nutze SequenceMatcher")
 # Spacy Lemmatizer
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
    print("Spacy Lemmatizer aktiviert")
 except:
    SPACY_AVAILABLE = False
    nlp = None
    print("Spacy nicht verfügbar – nutze naive Stemmer")
 # =========================
 # Pfade & Config
 # =========================
 INPUT_DIR = Path("Input CSV")
 OUTPUT_DIR = Path("Auswertung Ergebnisse")
 OUTPUT_DIR.mkdir(exist_ok=True)
 NORMVOC_FILE = Path("Input CSV/Normvokabular_INTERN/NV_MASTER.ods")
 CACHE_FILE = "api_cache.json"
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75
 TIMEOUT = 5
 MAX_RETRIES = 3
 BACKOFF_FACTOR = 2
 HEADERS = {"User-Agent": "VLG_API_multi/1.0 (projekt@example.com)"}
 API_ACTIVE = {"gnd": True, "wikidata": True}
 FAIL_COUNTER = {"gnd": 0, "wikidata": 0}
 # Cache
 if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE,"r",encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 def save_cache():
    with open(CACHE_FILE,"w",encoding="utf-8") as f:
        json.dump(CACHE, f, indent=2, ensure_ascii=False)
 # =========================
 # Normalisierung / Lemma
 # =========================
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 # Lemma-Cache
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        doc = nlp(term_norm)
        lemma = " ".join([token.lemma_ for token in doc])
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 # =========================
 # Kompositum-Zerlegung (einfacher Ansatz)
 # =========================
 def compound_split(term):
    parts = re.findall(r'[A-ZÄÖÜ][a-zäöü]+', term)
    return parts if parts else [term]
 # =========================
 # Normvokabular laden & Lemma vorbereiten
 # =========================
 def load_normvokabular(file_path):
    sheets = pd.read_excel(file_path, sheet_name=None, engine="odf" if file_path.suffix.lower()==".ods" else None)
    norm_dict = {}
    stem_index = defaultdict(list)
    lemma_norm_map = {}  # für RapidFuzz preprocessed
    for sheet_name, df in sheets.items():
        if sheet_name.lower() in ["master", "übersicht"]:
            continue
        df = df.dropna(how="all", axis=1)
        df.columns = [str(c).strip() for c in df.columns]
        id_col = next((c for c in df.columns if "ID" in c), None)
        word_col = next((c for c in df.columns if "Wort" in c or "Vokabel" in c), None)
        if not id_col or not word_col:
            continue
        current_parent_id = None
        for _, row in df.iterrows():
            row_id = str(row[id_col]).strip() if pd.notna(row[id_col]) else None
            row_word = str(row[word_col]).strip() if pd.notna(row[word_col]) else None
            if row_id:
                current_parent_id = row_id
            if not row_word:
                continue
            assigned_parent_id = current_parent_id
            entry = {
                "Name": row_word,
                "ID": assigned_parent_id,   # Parent-ID
                "Sheet": sheet_name,
                "Own_ID": row_id or ""      # eigene ID, falls vorhanden
            }
            key = normalize_text(row_word)
            norm_dict[key] = entry
            lemma = lemmatize_term(key)
            stem_index[lemma].append(entry)
            if lemma not in lemma_norm_map:
                lemma_norm_map[lemma] = entry
    return norm_dict, stem_index, lemma_norm_map
 # =========================
 # Mapping & Vorschläge
 # =========================
 def map_to_norm(term, norm_dict, stem_index, lemma_norm_map, top_n=3):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    # Exakter Treffer
    if term_norm in norm_dict:
        e = norm_dict[term_norm]
        return e["Name"], e["ID"], []
    # Lemma-Treffer
    if term_lemma in stem_index:
        e = stem_index[term_lemma][0]
        return e["Name"], e["ID"], []
    # KEIN TREFFER → Kompositum-Split
    tokens = compound_split(term)
    if len(tokens) == 1:
        suggestions = get_suggestions(term_lemma, lemma_norm_map, top_n)
        return "KEIN TREFFER", "", suggestions
    else:
        token_matches = []
        for t in tokens:
            t_lemma = lemmatize_term(t)
            if t_lemma in stem_index:
                e = stem_index[t_lemma][0]
                token_matches.append((t, e["Name"], e["ID"]))
            else:
                sugg = get_suggestions(t_lemma, lemma_norm_map, top_n)
                token_matches.append((t, "KEIN TREFFER", "", sugg))
        combined_suggestions = [f"{m[1]} ({m[2]})" for m in token_matches if m[1] != "KEIN TREFFER"]
        return "KEIN TREFFER", "", combined_suggestions
 def get_suggestions(term_lemma, lemma_norm_map, top_n=3, threshold=CONF_THRESHOLD):
    candidates = []
    for key_lemma, entry in lemma_norm_map.items():
        if RAPIDFUZZ_AVAILABLE:
            score = fuzz.token_set_ratio(term_lemma, key_lemma)/100
        else:
            score = SequenceMatcher(None, term_lemma.lower(), key_lemma.lower()).ratio()
        if key_lemma.lower().startswith(term_lemma.lower()):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            candidates.append((score, entry["Name"], entry["ID"]))
    candidates.sort(reverse=True)
    return [f"{name} ({id_})" for _, name, id_ in candidates[:top_n]]
 # =========================
 # API-Abfragen
 # =========================
 def request_with_retries(api_name,url,params=None):
    cache_key = url + str(params)
    if cache_key in CACHE:
        return CACHE[cache_key]
    retries = 0
    while retries < MAX_RETRIES:
        try:
            r = requests.get(url, params=params, timeout=TIMEOUT, headers=HEADERS)
            if r.status_code == 200:
                try: data = r.json()
                except: data = r.text
                CACHE[cache_key] = data
                FAIL_COUNTER[api_name] = 0
                return data
        except:
            pass
        retries += 1
        time.sleep(min(BACKOFF_FACTOR**retries,30))
    FAIL_COUNTER[api_name] += 1
    if FAIL_COUNTER[api_name] >= 10:
        API_ACTIVE[api_name] = False
    return None
 def batch_query_gnd(terms):
    results={}
    if not API_ACTIVE.get("gnd", False):
        for t in terms: results[t] = ""
        return results
    for t in terms:
        url="https://lobid.org/gnd/search"
        params={"q":t,"format":"json"}
        data = request_with_retries("gnd", url, params)
        top = ""
        if data and "member" in data:
            cands = [(doc.get("preferredName","") or doc.get("name",""), SequenceMatcher(None,t.lower(),(doc.get("preferredName","") or doc.get("name","")).lower()).ratio()) for doc in data["member"] if doc.get("preferredName","") or doc.get("name","")]
            cands = [c for c in cands if c[1]>=0.75]
            if cands:
                top = sorted(cands,key=lambda x:x[1],reverse=True)[0][0]
        results[t] = top
    return results
 def batch_query_wikidata(terms):
    results={}
    if not API_ACTIVE.get("wikidata", False):
        for t in terms: results[t] = ""
        return results
    for t in terms:
        url="https://www.wikidata.org/w/api.php"
        params={"action":"wbsearchentities","search":t,"language":"de","format":"json"}
        data = request_with_retries("wikidata", url, params)
        top = ""
        if data and "search" in data:
            cands = [(e.get("label",""), SequenceMatcher(None,t.lower(),e.get("label","").lower()).ratio()) for e in data["search"] if e.get("label","")]
            cands = [c for c in cands if c[1]>=0.70]
            if cands:
                top = sorted(cands,key=lambda x:x[1],reverse=True)[0][0]
        results[t] = top
    return results
 # =========================
 # Markierung / Export
 # =========================
 def mark_norm_hits(file_path):
    ext = file_path.suffix.lower()
    if ext in [".xlsx", ".xls"]:
        from openpyxl import load_workbook
        from openpyxl.styles import PatternFill
        wb = load_workbook(file_path)
        ws = wb.active
        green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
        red_fill   = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
        col_map = {cell.value: idx+1 for idx, cell in enumerate(ws[1])}
        norm_col = col_map.get("Norm_Treffer", None)
        if not norm_col:
            print("Spalte 'Norm_Treffer' nicht gefunden")
            return
        for row in ws.iter_rows(min_row=2, min_col=norm_col, max_col=norm_col):
            cell = row[0]
            if cell.value and cell.value != "KEIN TREFFER":
                cell.fill = green_fill
            else:
                cell.fill = red_fill
        wb.save(file_path)
    elif ext==".ods":
        df = pd.read_excel(file_path, engine="odf")
        df["Norm_Status"] = df["Norm_Treffer"].apply(lambda x: "Treffer" if pd.notna(x) and str(x).strip() and x!="KEIN TREFFER" else "Kein Treffer")
        df.to_excel(file_path, index=False, engine="odf")
 # =========================
 # Verarbeitung Input-Dateien
 # =========================
 def process_files():
    norm_dict, stem_index, lemma_norm_map = load_normvokabular(NORMVOC_FILE)
    total_terms = 0
    total_hits = 0
    if not INPUT_DIR.exists():
        print(f"Eingabeordner {INPUT_DIR} fehlt")
        sys.exit(1)
    files = list(INPUT_DIR.glob("*"))
    if not files:
        print("Keine Dateien gefunden")
        return
    for file_path in files:
        if not file_path.suffix.lower() in [".csv",".ods",".xls",".xlsx"]:
            continue
        print(f"Verarbeite Datei: {file_path.name}")
        try:
            if file_path.suffix.lower() == ".csv":
                df = pd.read_csv(file_path)
            else:
                df = pd.read_excel(file_path, engine="odf" if file_path.suffix.lower()==".ods" else None)
        except Exception as e:
            print(f"Fehler beim Lesen von {file_path.name}: {e}")
            continue
        df = df.dropna(how="all")
        df.columns = [str(c).strip() for c in df.columns]
        besch_col = next((c for c in df.columns if "Objektbeschreibung" in c), None)
        box_col = next((c for c in df.columns if "Objekt/Ebene" in c), None)
        urh_col = next((c for c in df.columns if "Urheber" in c), None)
        if not besch_col: continue
        row_terms_map = []
        for _, row in df.iterrows():
            besch = str(row[besch_col]).strip() if pd.notna(row[besch_col]) else ""
            if not besch: continue
            obj_box = row[box_col] if box_col else ""
            urheber = row[urh_col] if urh_col else ""
            clauses = [c.strip() for c in re.split(r",", besch) if c.strip()]
            terms = []
            for clause in clauses:
                parts = [p.strip() for p in re.split(r"\s+", clause) if p.strip()]
                for p in parts:
                    if p.lower() in STOPWORDS: continue
                    if re.fullmatch(r"\d+", p): continue
                    terms.append(p)
            row_terms_map.append((obj_box, urheber, terms))
        all_terms = list({t for _,_,terms in row_terms_map for t in terms})
        gnd_results = batch_query_gnd(all_terms)
        wd_results = batch_query_wikidata(all_terms)
        output_rows = []
        for obj_box, urheber, terms in row_terms_map:
            for term in terms:
                norm_name, norm_id, suggestions = map_to_norm(term, norm_dict, stem_index, lemma_norm_map)
                total_terms += 1
                if norm_name != "KEIN TREFFER":
                    total_hits += 1
                out_row = {
                    "Box": obj_box,
                    "Objekt/Ebene": obj_box,
                    "Urheber": urheber,
                    "Begriff": term,
                    "Norm_Treffer": norm_name,
                    "Norm_ID": norm_id,
                    "Norm_Vorschlag": ", ".join(suggestions) if suggestions else "",
                    "GND_Top1": gnd_results.get(term,""),
                    "WD_Top1": wd_results.get(term,"")
                }
                output_rows.append(out_row)
        out_df = pd.DataFrame(output_rows)
        output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}{file_path.suffix}"
        version = 1
        while output_file.exists():
            output_file = OUTPUT_DIR / f"Auswertung_{file_path.stem}_({version}){file_path.suffix}"
            version += 1
        engine = "odf" if output_file.suffix.lower()==".ods" else None
        out_df.to_excel(output_file, index=False, engine=engine)
        mark_norm_hits(output_file)
        print(f"Auswertung gespeichert: {output_file}")
    save_cache()
    print(f"Gesamt: {total_terms} Begriffe, {total_hits} Treffer im Normvokabular")
 # =========================
 # Main
 # =========================
 if __name__ == "__main__":
    process_files()
    print("Fertig")
--- a/mapper_macro.py
+++ b/mapper_macro.py
@ -0,0 +1,237 @@
 import uno
 import os
 import re
 import traceback
 import json
 # Optional für Lemmatizer
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except:
    SPACY_AVAILABLE = False
    nlp = None
 # Optional für Fuzzy Matching
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except:
    from difflib import SequenceMatcher
    RAPIDFUZZ_AVAILABLE = False
 import odf.opendocument
 import odf.table
 import odf.text
 # ------------------------
 # Konfiguration absolute Pfade
 # ------------------------
 BASE_DIR = "/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro"
 NV_MASTER_PATH = os.path.join(BASE_DIR, "NV_MASTER.ods")
 LOG_FILE = os.path.join(BASE_DIR, "mapper_macro.log")
 CACHE_FILE = os.path.join(BASE_DIR, "mapper_cache.json")
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75
 # ------------------------
 # Logging
 # ------------------------
 def log(msg):
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(msg + "\n")
 # ------------------------
 # Cache laden
 # ------------------------
 if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE, "r", encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 # ------------------------
 # Normalisierung / Lemma
 # ------------------------
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        doc = nlp(term_norm)
        lemma = " ".join([token.lemma_ for token in doc])
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 # ------------------------
 # NV_MASTER einlesen
 # ------------------------
 def load_nv_master(path):
    norm_dict = {}
    try:
        doc = odf.opendocument.load(path)
    except Exception as e:
        log(f"Fehler beim Laden von NV_MASTER: {e}")
        return norm_dict
    for sheet in doc.spreadsheet.getElementsByType(odf.table.Table):
        sheet_name = sheet.getAttribute("name")
        if sheet_name.lower() == "master":
            continue
        current_parent_id = None
        for row in sheet.getElementsByType(odf.table.TableRow):
            cells = row.getElementsByType(odf.table.TableCell)
            cell_values = []
            for cell in cells:
                texts = cell.getElementsByType(odf.text.P)
                if texts and texts[0].firstChild:
                    cell_values.append(str(texts[0].firstChild.data).strip())
                else:
                    cell_values.append("")
            if not cell_values or len(cell_values)<4:
                continue
            id_val, unterk, unterunterk, word = cell_values[:4]
            if id_val:
                current_parent_id = id_val.strip()
            if not word:
                continue
            key = lemmatize_term(word)
            norm_dict[key] = {
                "Name": word.strip(),
                "ID": current_parent_id,
                "Sheet": sheet_name,
                "Unterkategorie": unterk.strip(),
                "Unterunterkategorie": unterunterk.strip()
            }
    log(f"NV_MASTER geladen: {len(norm_dict)} Begriffe")
    return norm_dict
 # ------------------------
 # Matching
 # ------------------------
 def get_suggestions(term_lemma, norm_dict, top_n=3, threshold=CONF_THRESHOLD):
    candidates = []
    for key, entry in norm_dict.items():
        if RAPIDFUZZ_AVAILABLE:
            score = fuzz.token_set_ratio(term_lemma, key)/100
        else:
            score = SequenceMatcher(None, term_lemma.lower(), key.lower()).ratio()
        if key.lower().startswith(term_lemma.lower()):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            candidates.append((score, entry["Name"], entry["ID"]))
    candidates.sort(reverse=True)
    return [f"{name} ({id_})" for _, name, id_ in candidates[:top_n]]
 def map_word(word, norm_dict):
    key = lemmatize_term(word)
    if key in CACHE:
        cached = CACHE[key]
        return cached["Norm"], cached["Suggestion"], cached["ID"]
    if key in norm_dict:
        entry = norm_dict[key]
        tr, sug, wid = entry["Name"], "", entry["ID"]
    else:
        suggestions = get_suggestions(term_lemma=key, norm_dict=norm_dict)
        if suggestions:
            tr, sug, wid = "KEIN TREFFER", ", ".join(suggestions), ""
        else:
            tr, sug, wid = "KEIN TREFFER", "", ""
    CACHE[key] = {"Norm": tr, "Suggestion": sug, "ID": wid}
    return tr, sug, wid
 # ------------------------
 # Makro-Hauptfunktion
 # ------------------------
 def run_mapper_macro():
    try:
        doc = XSCRIPTCONTEXT.getDocument()
        sheets = doc.getSheets()
        sheet = sheets.getByIndex(0)
        cursor = sheet.createCursor()
        cursor.gotoStartOfUsedArea(False)
        cursor.gotoEndOfUsedArea(True)
        data_range = cursor.getRangeAddress()
        header_row = 0
        objekt_col = None
        # Header prüfen
        for col in range(data_range.EndColumn+1):
            val = sheet.getCellByPosition(col, header_row).String.strip().lower()
            if val == "objektbeschreibung":
                objekt_col = col
                break
        if objekt_col is None:
            log("Spalte 'Objektbeschreibung' nicht gefunden")
            return
        # Neue Spalten am rechten Tabellenende erstellen
        max_col = data_range.EndColumn
        norm_tr_col = max_col + 1
        norm_sug_col = max_col + 2
        norm_id_col = max_col + 3
        sheet.getCellByPosition(norm_tr_col, header_row).String = "Norm_Treffer"
        sheet.getCellByPosition(norm_sug_col, header_row).String = "Norm_Vorschlag"
        sheet.getCellByPosition(norm_id_col, header_row).String = "Norm_ID"
        norm_dict = load_nv_master(NV_MASTER_PATH)
        # Farben
        GREEN = 0xC6EFCE
        YELLOW = 0xFFEB9C
        RED = 0xFFC7CE
        for row in range(1, data_range.EndRow+1):
            cell = sheet.getCellByPosition(objekt_col, row)
            val = cell.String.strip()
            if not val:
                continue
            words = [w.strip() for w in re.split(r"\s+", val) if w.strip() and w.lower() not in STOPWORDS]
            tr_list, sug_list, id_list = [], [], []
            for w in words:
                tr, sug, wid = map_word(w, norm_dict)
                if tr != "KEIN TREFFER":
                    tr_list.append(tr)
                if sug:
                    sug_list.append(sug)
                if wid:
                    id_list.append(wid)
            sheet.getCellByPosition(norm_tr_col, row).String = ", ".join(tr_list)
            sheet.getCellByPosition(norm_sug_col, row).String = ", ".join(sug_list)
            sheet.getCellByPosition(norm_id_col, row).String = ", ".join(id_list)
            # Farbmarkierung
            if tr_list:
                cell.CellBackColor = GREEN
            elif sug_list:
                cell.CellBackColor = YELLOW
            else:
                cell.CellBackColor = RED
        # Cache speichern
        with open(CACHE_FILE, "w", encoding="utf-8") as f:
            json.dump(CACHE, f, ensure_ascii=False, indent=2)
        log("Makro erfolgreich ausgeführt")
    except Exception as e:
        log("Fehler in run_mapper_macro:")
        log(traceback.format_exc())
--- a/mapper_macro_1.1.py
+++ b/mapper_macro_1.1.py
@ -0,0 +1,448 @@
 # -*- coding: utf-8 -*-
 # LibreOffice Calc macro: NV_MASTER-Abgleich, Pandas+odf, Cache, Farben
 # Pfade: BASE_DIR muss auf das Verzeichnis zeigen, in dem NV_MASTER.ods + Makro liegen.
 # Speichern: /home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro/mapper_macro.py
 import os
 import re
 import json
 import traceback
 # UNO-Context wird zur Laufzeit zur Verfügung gestellt (XSCRIPTCONTEXT)
 # Third-party libs: pandas, odfpy, optional: spacy, rapidfuzz
 try:
    import pandas as pd
    PANDAS_AVAILABLE = True
 except Exception:
    PANDAS_AVAILABLE = False
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except Exception:
    SPACY_AVAILABLE = False
    nlp = None
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except Exception:
    RAPIDFUZZ_AVAILABLE = False
    from difflib import SequenceMatcher
 # ------------------------
 # Konfiguration
 # ------------------------
 BASE_DIR = "/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro"
 NV_MASTER_PATH = os.path.join(BASE_DIR, "NV_MASTER.ods")
 LOG_FILE = os.path.join(BASE_DIR, "mapper_macro.log")
 CACHE_FILE = os.path.join(BASE_DIR, "mapper_cache.json")
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 CONF_THRESHOLD = 0.75  # Basis-Schwelle für Vorschläge
 # ------------------------
 # Utilities: Logging & safe I/O
 # ------------------------
 def log(msg):
    try:
        with open(LOG_FILE, "a", encoding="utf-8") as f:
            f.write(msg + "\n")
    except Exception:
        pass
 # ------------------------
 # Cache laden
 # ------------------------
 try:
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, "r", encoding="utf-8") as f:
            CACHE = json.load(f)
    else:
        CACHE = {}
 except Exception:
    CACHE = {}
 # ------------------------
 # Text-Normalisierung & Lemma
 # ------------------------
 def normalize_text(s):
    if not s:
        return ""
    s = str(s).strip().lower()
    s = re.sub(r"[\(\)\[\]\"'\\;:\?!,\.]", "", s)
    s = re.sub(r"\s+", " ", s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        try:
            doc = nlp(term_norm)
            lemma = " ".join([token.lemma_ for token in doc])
        except Exception:
            lemma = term_norm
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 # ------------------------
 # NV_MASTER robust laden (pandas + odf)
 # ------------------------
 def build_norm_index(nv_path):
    norm_dict = {}         # normalized_name -> list of entries (Name, ID, Sheet)
    lemma_index = {}       # lemma -> list of entries
    if not PANDAS_AVAILABLE:
        log("Pandas nicht verfügbar. NV_MASTER kann nicht zuverlässig gelesen werden.")
        return norm_dict, lemma_index
    try:
        sheets = pd.read_excel(nv_path, sheet_name=None, engine="odf")
    except Exception as e:
        log(f"Fehler beim Einlesen von NV_MASTER mit pandas: {e}")
        return norm_dict, lemma_index
    for sheet_name, df in sheets.items():
        if str(sheet_name).strip().lower() == "master":
            continue
        # normalize columns names to find ID and Wort columns
        df = df.fillna("")  # leere Zellen als ""
        cols = [str(c).strip().lower() for c in df.columns]
        # try to find columns
        id_col = None
        word_col = None
        for i, c in enumerate(cols):
            if "id" in c:
                id_col = df.columns[i]
            if "wort" in c or "vokabel" in c:
                word_col = df.columns[i]
        # fallback: if not found, try first/last
        if word_col is None and len(df.columns) >= 1:
            word_col = df.columns[-1]
        if id_col is None and len(df.columns) >= 1:
            id_col = df.columns[0]
        current_parent_id = None
        for _, row in df.iterrows():
            id_val = str(row[id_col]).strip() if id_col in df.columns else ""
            word_val = str(row[word_col]).strip() if word_col in df.columns else ""
            # if row defines an ID, set as current parent
            if id_val:
                current_parent_id = id_val
            # skip empty word cells
            if not word_val:
                continue
            norm_name = normalize_text(word_val)
            lemma = lemmatize_term(word_val)
            entry = {"Name": word_val.strip(), "ID": current_parent_id or "", "Sheet": sheet_name}
            # add to norm_dict by normalized name (exact matching)
            norm_dict.setdefault(norm_name, []).append(entry)
            # add to lemma_index
            lemma_index.setdefault(lemma, []).append(entry)
    log(f"NV_MASTER geladen ({NV_MASTER_PATH}). Begriffe: {sum(len(v) for v in norm_dict.values())}")
    return norm_dict, lemma_index
 # ------------------------
 # Matching: exakter Treffer, Lemma-Treffer, Fuzzy-Vorschläge
 # ------------------------
 def fuzzy_score(a, b):
    if RAPIDFUZZ_AVAILABLE:
        try:
            return fuzz.token_set_ratio(a, b) / 100.0
        except Exception:
            return 0.0
    else:
        try:
            return SequenceMatcher(None, a.lower(), b.lower()).ratio()
        except Exception:
            return 0.0
 def get_suggestions_for_term(term_lemma, norm_dict, lemma_index, top_n=None, threshold=CONF_THRESHOLD):
    # collect candidates from lemma_index keys and norm_dict keys
    candidates = []
    # iterate over lemma_index keys for candidate names
    for key_lemma, entries in lemma_index.items():
        score = fuzzy_score(term_lemma, key_lemma)
        if key_lemma.startswith(term_lemma):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    # also check norm_dict keys (exact-normalized names) as additional candidates
    for norm_key, entries in norm_dict.items():
        score = fuzzy_score(term_lemma, norm_key)
        if norm_key.startswith(term_lemma):
            score = min(score + 0.1, 1.0)
        if score >= threshold:
            for e in entries:
                candidates.append((score, e["Name"], e["ID"]))
    # sort by score descending
    candidates.sort(key=lambda t: t[0], reverse=True)
    # unique by (Name, ID) preserve score order
    seen = set()
    results = []
    for score, name, id_ in candidates:
        key = (name, id_)
        if key in seen:
            continue
        seen.add(key)
        results.append({"score": score, "name": name, "id": id_})
    # return all candidates (no limit) as "Name (ID)"
    return [f'{r["name"]} ({r["id"]})' if r["id"] else r["name"] for r in results]
 def map_term_with_indexes(term, norm_dict, lemma_index):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term)
    # cache lookup
    if term_lemma in CACHE:
        return CACHE[term_lemma]["hits"], CACHE[term_lemma]["suggestions"], CACHE[term_lemma]["ids"]
    hits = []
    suggestions = []
    ids = []
    # 1) exact normalized name match
    if term_norm in norm_dict:
        for e in norm_dict[term_norm]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    # 2) lemma match (if not already hits)
    if not hits and term_lemma in lemma_index:
        for e in lemma_index[term_lemma]:
            hits.append(e["Name"])
            if e["ID"]:
                ids.append(e["ID"])
    # 3) suggestions via fuzzy (always compute even if hits exist, but suggestions empty if exact)
    suggs = get_suggestions_for_term(term_lemma, norm_dict, lemma_index, top_n=None, threshold=CONF_THRESHOLD)
    # If there are exact hits, we still may present suggestions (user wanted unlimited), but suggestions are secondary
    suggestions = suggs
    # deduplicate lists preserving order
    def unique_preserve(seq):
        seen = set()
        out = []
        for x in seq:
            if x not in seen:
                seen.add(x)
                out.append(x)
        return out
    hits = unique_preserve(hits)
    suggestions = unique_preserve(suggestions)
    ids = unique_preserve(ids)
    # cache result
    CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
    return hits, suggestions, ids
 # ------------------------
 # Haupt-Makro
 # ------------------------
 def run_mapper_macro():
    try:
        # UNO doc/sheet
        doc = XSCRIPTCONTEXT.getDocument()
        sheet = doc.CurrentController.ActiveSheet
        cursor = sheet.createCursor()
        cursor.gotoStartOfUsedArea(False)
        cursor.gotoEndOfUsedArea(True)
        data_range = cursor.getRangeAddress()
    except Exception as e:
        log("Fehler: konnte Dokument/Sheet nicht öffnen: " + str(e))
        return
    # find header row and Objektbeschreibung column (search first 5 rows)
    header_row = None
    objekt_col = None
    max_col = data_range.EndColumn
    for r in range(0, min(5, data_range.EndRow+1)):
        for c in range(0, max_col+1):
            try:
                val = str(sheet.getCellByPosition(c, r).String).strip().lower()
            except Exception:
                val = ""
            if val == "objektbeschreibung":
                header_row = r
                objekt_col = c
                break
        if objekt_col is not None:
            break
    if objekt_col is None:
        log("Spalte 'Objektbeschreibung' nicht gefunden. Abbruch.")
        return
    # determine or create result columns: search if exist anywhere; otherwise append at right end
    existing = {}
    for c in range(0, data_range.EndColumn+1):
        try:
            h = str(sheet.getCellByPosition(c, header_row).String).strip()
        except Exception:
            h = ""
        if h == "Norm_Treffer":
            existing["Norm_Treffer"] = c
        if h == "Norm_Vorschlag":
            existing["Norm_Vorschlag"] = c
        if h == "Norm_ID":
            existing["Norm_ID"] = c
    # append columns at right end if missing
    last_col = data_range.EndColumn
    if "Norm_Treffer" not in existing:
        last_col += 1
        existing["Norm_Treffer"] = last_col
        try:
            sheet.getCellByPosition(last_col, header_row).String = "Norm_Treffer"
        except Exception:
            pass
    if "Norm_Vorschlag" not in existing:
        last_col += 1
        existing["Norm_Vorschlag"] = last_col
        try:
            sheet.getCellByPosition(last_col, header_row).String = "Norm_Vorschlag"
        except Exception:
            pass
    if "Norm_ID" not in existing:
        last_col += 1
        existing["Norm_ID"] = last_col
        try:
            sheet.getCellByPosition(last_col, header_row).String = "Norm_ID"
        except Exception:
            pass
    norm_tr_col = existing["Norm_Treffer"]
    norm_sug_col = existing["Norm_Vorschlag"]
    norm_id_col = existing["Norm_ID"]
    # Build norm indexes
    norm_dict, lemma_index = build_norm_index(NV_MASTER_PATH)
    if not norm_dict and not lemma_index:
        log("NV_MASTER leer oder nicht lesbar. Abbruch.")
        return
    # colors
    GREEN = 0xADFF2F
    YELLOW = 0xFFA500
    RED = 0xCC0000
    # iterate rows
    rows_processed = 0
    for r in range(header_row + 1, data_range.EndRow + 1):
        try:
            cell = sheet.getCellByPosition(objekt_col, r)
            txt = str(cell.String).strip()
            if not txt:
                # clear any previous outputs? keep existing per spec; skip empty
                continue
            # tokenize: split by commas first, then whitespace; filter stopwords and pure numbers
            clauses = [c.strip() for c in re.split(r",", txt) if c.strip()]
            terms = []
            for cl in clauses:
                parts = [p.strip() for p in re.split(r"\s+", cl) if p.strip()]
                for p in parts:
                    if p.lower() in STOPWORDS:
                        continue
                    if re.fullmatch(r"\d+", p):
                        continue
                    terms.append(p)
            # for each term, get hits/suggestions/ids
            row_hits = []
            row_sugs = []
            row_ids = []
            any_unmapped = False  # at least one term without hit and without suggestion
            # We will record for each term
            for term in terms:
                hits, sugs, ids = map_term_with_indexes(term, norm_dict, lemma_index)
                if hits:
                    row_hits.extend(hits)
                if sugs:
                    row_sugs.extend(sugs)
                if ids:
                    row_ids.extend(ids)
                if (not hits) and (not sugs):
                    any_unmapped = True
            # deduplicate preserving order
            def uniq(seq):
                seen = set()
                out = []
                for x in seq:
                    if x not in seen:
                        seen.add(x)
                        out.append(x)
                return out
            row_hits = uniq(row_hits)
            row_sugs = uniq(row_sugs)
            row_ids = uniq(row_ids)
            # write outputs (unlimited lists, joined with " | ")
            try:
                sheet.getCellByPosition(norm_tr_col, r).String = " | ".join(row_hits)
                sheet.getCellByPosition(norm_sug_col, r).String = " | ".join(row_sugs)
                sheet.getCellByPosition(norm_id_col, r).String = " | ".join(row_ids)
            except Exception:
                pass
            # Coloring rules per new spec:
            # - Objektbeschreibung cell: RED if any_unmapped else no change (we do not color green/yellow here)
            # - Norm_Treffer cell: GREEN if all terms matched (i.e., terms non-empty and no term unmapped and at least one hit per term)
            # - Norm_Vorschlag cell: YELLOW if at least one suggestion exists
            # Determine "all matched": terms non-empty and every term has at least one hit (we approximated by checking any_unmapped and hits length)
            all_matched = False
            if terms:
                # all_matched if no term without hit and there is at least one hit overall
                if (not any_unmapped) and row_hits:
                    all_matched = True
            # apply colors
            try:
                if any_unmapped:
                    cell.CellBackColor = RED
                else:
                    # clear red if previously set? We'll leave unchanged if not set. Optionally set to default 16777215 (white)
                    pass
                # Norm_Treffer coloring
                tr_cell = sheet.getCellByPosition(norm_tr_col, r)
                if all_matched:
                    tr_cell.CellBackColor = GREEN
                else:
                    # clear color if needed -> set to white
                    tr_cell.CellBackColor = 0xFFFFFF
                # Norm_Vorschlag coloring
                sug_cell = sheet.getCellByPosition(norm_sug_col, r)
                if row_sugs:
                    sug_cell.CellBackColor = YELLOW
                else:
                    sug_cell.CellBackColor = 0xFFFFFF
            except Exception:
                pass
            rows_processed += 1
        except Exception as e:
            # continue processing other rows; log once
            log(f"Fehler in Zeile {r}: {e}")
    # persist cache
    try:
        with open(CACHE_FILE, "w", encoding="utf-8") as f:
            json.dump(CACHE, f, ensure_ascii=False, indent=2)
    except Exception:
        pass
    log(f"run_mapper_macro fertig. Zeilen verarbeitet: {rows_processed}")
 # Export for LO
 g_exportedScripts = (run_mapper_macro,)
--- a/mapper_macro_1.2.py
+++ b/mapper_macro_1.2.py
@ -0,0 +1,297 @@
 # -*- coding: utf-8 -*-
 import os
 import uno
 import unohelper
 import re
 import json
 import pandas as pd
 from pathlib import Path
 from difflib import SequenceMatcher
 # RapidFuzz für Fuzzy-Suche
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except ImportError:
    RAPIDFUZZ_AVAILABLE = False
 # Spacy Lemmatizer
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except:
    SPACY_AVAILABLE = False
    nlp = None
 # =========================
 # Pfade & Config
 # =========================
 SCRIPT_DIR = Path("/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro")
 NV_MASTER_FILE = SCRIPT_DIR / "NV_MASTER.ods"
 CACHE_FILE = SCRIPT_DIR / "mapper_cache.json"
 LOG_FILE = SCRIPT_DIR / "mapper_log.txt"
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 # =========================
 # Cache & Logging
 # =========================
 if CACHE_FILE.exists():
    with open(CACHE_FILE,"r",encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 def save_cache():
    with open(CACHE_FILE,"w",encoding="utf-8") as f:
        json.dump(CACHE, f, indent=2, ensure_ascii=False)
 def log(msg):
    with open(LOG_FILE,"a",encoding="utf-8") as f:
        f.write(msg + "\n")
 # =========================
 # Textverarbeitung
 # =========================
 def normalize_text(s):
    if not s: return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        doc = nlp(term_norm)
        lemma = " ".join([token.lemma_ for token in doc])
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 def compound_split(term):
    parts = re.findall(r'[A-ZÄÖÜa-zäöü]+', term)
    return parts if parts else [term]
 # =========================
 # NV_MASTER laden
 # =========================
 def load_normvokabular(file_path):
    sheets = pd.read_excel(file_path, sheet_name=None, engine="odf")
    norm_dict = {}
    for sheet_name, df in sheets.items():
        df = df.dropna(how="all", axis=1)
        df.columns = [str(c).strip() for c in df.columns]
        if "ID" not in df.columns or "Wort/Vokabel" not in df.columns:
            continue
        current_parent_id = None
        for _, row in df.iterrows():
            row_id = str(row["ID"]).strip() if pd.notna(row["ID"]) else None
            row_word = str(row["Wort/Vokabel"]).strip() if pd.notna(row["Wort/Vokabel"]) else None
            if row_id: current_parent_id = row_id
            if not row_word: continue
            norm_dict[normalize_text(row_word)] = {
                "ID": current_parent_id,
                "Wort/Vokabel": row_word
            }
    return norm_dict
 # =========================
 # Mapping
 # =========================
 def map_term_with_indexes(term, norm_dict):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term_norm)
    # Cache prüfen
    if term_lemma in CACHE:
        cached = CACHE[term_lemma]
        if isinstance(cached, dict) and all(k in cached for k in ("hits","suggestions","ids")):
            return cached["hits"], cached["suggestions"], cached["ids"]
        else:
            CACHE.pop(term_lemma, None)
    hits = []
    suggestions = []
    ids = []
    # Exakte Treffer
    if term_norm in norm_dict:
        e = norm_dict[term_norm]
        hits.append(e["Wort/Vokabel"])
        ids.append(e["ID"])
    elif term_lemma in norm_dict:
        e = norm_dict[term_lemma]
        hits.append(e["Wort/Vokabel"])
        ids.append(e["ID"])
    else:
        # Fuzzy Matching
        for key, e in norm_dict.items():
            score = fuzz.token_sort_ratio(term_lemma, key)/100.0 if RAPIDFUZZ_AVAILABLE else SequenceMatcher(None, term_lemma, key).ratio()
            if score >= 0.75:
                suggestions.append(e["Wort/Vokabel"])
                ids.append(e["ID"])
    CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
    return hits, suggestions, ids
 # =========================
 # LibreOffice Dialog (ListBox + Checkbox)
 # =========================
 def apply_proposals_dialog():
    ctx = uno.getComponentContext()
    smgr = ctx.ServiceManager
    desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
    doc = desktop.getCurrentComponent()
    if not doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
        log("Kein Calc-Dokument aktiv")
        return
    selection = doc.CurrentSelection
    sheet = doc.CurrentController.ActiveSheet
    # Prüfen ob eine Zelle ausgewählt ist
    if selection is None or not hasattr(selection, "getCellAddress"):
        log("Keine Zelle ausgewählt")
        return
    cell = selection
    # Spalte überprüfen
    header_row = sheet.getCellRangeByPosition(0,0,sheet.Columns.Count-1,0)
    objekt_col = None
    norm_vorschlag_col = None
    for col_idx in range(sheet.Columns.Count):
        val = sheet.getCellByPosition(col_idx,0).String
        if val.strip().lower() == "objektbeschreibung":
            objekt_col = col_idx
        elif val.strip().lower() == "norm_vorschlag":
            norm_vorschlag_col = col_idx
    if norm_vorschlag_col is None or objekt_col is None:
        log("Spalte 'Norm_Vorschlag' oder 'Objektbeschreibung' nicht gefunden")
        return
    # Vorschläge auslesen
    proposals_str = sheet.getCellByPosition(norm_vorschlag_col, cell.RangeAddress.StartRow).String
    if not proposals_str.strip():
        log("Keine Vorschläge in der ausgewählten Zelle")
        return
    proposals = [p.strip() for p in proposals_str.split(";") if p.strip()]
    # Dialog erstellen
    toolkit = smgr.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx)
    dialog_model = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialogModel", ctx)
    dialog_model.Width = 180
    dialog_model.Height = 150
    dialog_model.Title = "Vorschläge übernehmen"
    # ListBox
    lb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlListBoxModel")
    lb_model.Name = "ProposalList"
    lb_model.PositionX = 10
    lb_model.PositionY = 10
    lb_model.Width = 160
    lb_model.Height = 80
    lb_model.StringItemList = tuple(proposals)
    dialog_model.insertByName("ProposalList", lb_model)
    # Checkbox
    cb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlCheckBoxModel")
    cb_model.Name = "AllCheck"
    cb_model.PositionX = 10
    cb_model.PositionY = 95
    cb_model.Width = 160
    cb_model.Height = 15
    cb_model.Label = "Alle Vorschläge übernehmen"
    dialog_model.insertByName("AllCheck", cb_model)
    # OK-Button
    btn_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
    btn_model.Name = "OKButton"
    btn_model.PositionX = 10
    btn_model.PositionY = 115
    btn_model.Width = 80
    btn_model.Height = 20
    btn_model.Label = "OK"
    dialog_model.insertByName("OKButton", btn_model)
    # Abbrechen-Button
    cancel_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
    cancel_model.Name = "CancelButton"
    cancel_model.PositionX = 100
    cancel_model.PositionY = 115
    cancel_model.Width = 80
    cancel_model.Height = 20
    cancel_model.Label = "Abbrechen"
    dialog_model.insertByName("CancelButton", cancel_model)
    # Control Dialog
    dialog = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialog", ctx)
    dialog.setModel(dialog_model)
    dialog.setVisible(True)
    toolkit.createPeer(dialog, None)
    # Warten auf OK
    while True:
        import time
        time.sleep(0.1)
        # Prüfen auf Klick
        if dialog.getControl("OKButton").Pressed:
            all_flag = dialog.getControl("AllCheck").State == 1
            selected_idx = dialog.getControl("ProposalList").SelectedItems
            if selected_idx:
                selected_proposal = proposals[selected_idx[0]]
            else:
                selected_proposal = None
            break
        elif dialog.getControl("CancelButton").Pressed:
            dialog.endExecute()
            return
    # Anwenden
    obj_cell = sheet.getCellByPosition(objekt_col, cell.RangeAddress.StartRow)
    obj_text = obj_cell.String
    if all_flag:
        for prop in proposals:
            idx = obj_text.lower().find(prop.lower())
            if idx != -1:
                obj_text = obj_text[:idx] + prop + obj_text[idx+len(prop):]
    else:
        if selected_proposal:
            idx = obj_text.lower().find(selected_proposal.lower())
            if idx != -1:
                obj_text = obj_text[:idx] + selected_proposal + obj_text[idx+len(selected_proposal):]
    obj_cell.String = obj_text
    obj_cell.CellBackColor = 0x00FF00  # grün
    dialog.endExecute()
    save_cache()
    log(f"Vorschlag übernommen: {obj_text}")
 # =========================
 # Automatische Button-Registrierung
 # =========================
 def register_toolbar_button():
    ctx = uno.getComponentContext()
    smgr = ctx.ServiceManager
    desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
    doc = desktop.getCurrentComponent()
    frame = doc.CurrentController.Frame
    # Button kann manuell über Makro-Menü an Toolbar gebunden werden
    # Hier wird nur das Makro selbst registriert
    # Symbolleiste muss in LO einmalig erstellt werden
 # =========================
 # Hauptmakro
 # =========================
 def run_mapper_macro():
    try:
        norm_dict = load_normvokabular(NV_MASTER_FILE)
        log(f"NV_MASTER geladen ({len(norm_dict)} Begriffe)")
        apply_proposals_dialog()
    except Exception as e:
        log(f"Fehler in run_mapper_macro: {e}")
--- a/mapper_macro_1.3.py
+++ b/mapper_macro_1.3.py
@ -0,0 +1,297 @@
 # -*- coding: utf-8 -*-
 import os
 import uno
 import unohelper
 import re
 import json
 import pandas as pd
 from pathlib import Path
 from difflib import SequenceMatcher
 # RapidFuzz für Fuzzy-Suche
 try:
    from rapidfuzz import fuzz
    RAPIDFUZZ_AVAILABLE = True
 except ImportError:
    RAPIDFUZZ_AVAILABLE = False
 # Spacy Lemmatizer
 try:
    import spacy
    nlp = spacy.load("de_core_news_sm")
    SPACY_AVAILABLE = True
 except:
    SPACY_AVAILABLE = False
    nlp = None
 # =========================
 # Pfade & Config
 # =========================
 SCRIPT_DIR = Path("/home/jarnold/.config/libreoffice/4/user/Scripts/python/NV Abgleich Makro")
 NV_MASTER_FILE = SCRIPT_DIR / "NV_MASTER.ods"
 CACHE_FILE = SCRIPT_DIR / "mapper_cache.json"
 LOG_FILE = SCRIPT_DIR / "mapper_log.txt"
 STOPWORDS = {"mit","ohne","der","die","das","ein","eine","und","zu","von","im","in","auf","an","als","bei","für","aus","dem","den","des","eines","einer"}
 # =========================
 # Cache & Logging
 # =========================
 if CACHE_FILE.exists():
    with open(CACHE_FILE,"r",encoding="utf-8") as f:
        CACHE = json.load(f)
 else:
    CACHE = {}
 def save_cache():
    with open(CACHE_FILE,"w",encoding="utf-8") as f:
        json.dump(CACHE, f, indent=2, ensure_ascii=False)
 def log(msg):
    with open(LOG_FILE,"a",encoding="utf-8") as f:
        f.write(msg + "\n")
 # =========================
 # Textverarbeitung
 # =========================
 def normalize_text(s):
    if not s: return ""
    s = str(s).lower().strip()
    s = re.sub(r"[\(\)\[\]\"'\\.,;:\?!]", "", s)
    s = re.sub(r"\s+"," ",s)
    return s
 lemma_cache = {}
 def lemmatize_term(term):
    term_norm = normalize_text(term)
    if term_norm in lemma_cache:
        return lemma_cache[term_norm]
    if SPACY_AVAILABLE and nlp:
        doc = nlp(term_norm)
        lemma = " ".join([token.lemma_ for token in doc])
    else:
        lemma = term_norm
    lemma_cache[term_norm] = lemma
    return lemma
 def compound_split(term):
    parts = re.findall(r'[A-ZÄÖÜa-zäöü]+', term)
    return parts if parts else [term]
 # =========================
 # NV_MASTER laden
 # =========================
 def load_normvokabular(file_path):
    sheets = pd.read_excel(file_path, sheet_name=None, engine="odf")
    norm_dict = {}
    for sheet_name, df in sheets.items():
        df = df.dropna(how="all", axis=1)
        df.columns = [str(c).strip() for c in df.columns]
        if "ID" not in df.columns or "Wort/Vokabel" not in df.columns:
            continue
        current_parent_id = None
        for _, row in df.iterrows():
            row_id = str(row["ID"]).strip() if pd.notna(row["ID"]) else None
            row_word = str(row["Wort/Vokabel"]).strip() if pd.notna(row["Wort/Vokabel"]) else None
            if row_id: current_parent_id = row_id
            if not row_word: continue
            norm_dict[normalize_text(row_word)] = {
                "ID": current_parent_id,
                "Wort/Vokabel": row_word
            }
    return norm_dict
 # =========================
 # Mapping
 # =========================
 def map_term_with_indexes(term, norm_dict):
    term_norm = normalize_text(term)
    term_lemma = lemmatize_term(term_norm)
    # Cache prüfen
    if term_lemma in CACHE:
        cached = CACHE[term_lemma]
        if isinstance(cached, dict) and all(k in cached for k in ("hits","suggestions","ids")):
            return cached["hits"], cached["suggestions"], cached["ids"]
        else:
            CACHE.pop(term_lemma, None)
    hits = []
    suggestions = []
    ids = []
    # Exakte Treffer
    if term_norm in norm_dict:
        e = norm_dict[term_norm]
        hits.append(e["Wort/Vokabel"])
        ids.append(e["ID"])
    elif term_lemma in norm_dict:
        e = norm_dict[term_lemma]
        hits.append(e["Wort/Vokabel"])
        ids.append(e["ID"])
    else:
        # Fuzzy Matching
        for key, e in norm_dict.items():
            score = fuzz.token_sort_ratio(term_lemma, key)/100.0 if RAPIDFUZZ_AVAILABLE else SequenceMatcher(None, term_lemma, key).ratio()
            if score >= 0.75:
                suggestions.append(e["Wort/Vokabel"])
                ids.append(e["ID"])
    CACHE[term_lemma] = {"hits": hits, "suggestions": suggestions, "ids": ids}
    return hits, suggestions, ids
 # =========================
 # LibreOffice Dialog (ListBox + Checkbox)
 # =========================
 def apply_proposals_dialog():
    ctx = uno.getComponentContext()
    smgr = ctx.ServiceManager
    desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
    doc = desktop.getCurrentComponent()
    if not doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
        log("Kein Calc-Dokument aktiv")
        return
    selection = doc.CurrentSelection
    sheet = doc.CurrentController.ActiveSheet
    # Prüfen ob eine Zelle ausgewählt ist
    if selection is None or not hasattr(selection, "getCellAddress"):
        log("Keine Zelle ausgewählt")
        return
    cell = selection
    # Spalte überprüfen
    header_row = sheet.getCellRangeByPosition(0,0,sheet.Columns.Count-1,0)
    objekt_col = None
    norm_vorschlag_col = None
    for col_idx in range(sheet.Columns.Count):
        val = sheet.getCellByPosition(col_idx,0).String
        if val.strip().lower() == "objektbeschreibung":
            objekt_col = col_idx
        elif val.strip().lower() == "norm_vorschlag":
            norm_vorschlag_col = col_idx
    if norm_vorschlag_col is None or objekt_col is None:
        log("Spalte 'Norm_Vorschlag' oder 'Objektbeschreibung' nicht gefunden")
        return
    # Vorschläge auslesen
    proposals_str = sheet.getCellByPosition(norm_vorschlag_col, cell.RangeAddress.StartRow).String
    if not proposals_str.strip():
        log("Keine Vorschläge in der ausgewählten Zelle")
        return
    proposals = [p.strip() for p in proposals_str.split(";") if p.strip()]
    # Dialog erstellen
    toolkit = smgr.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx)
    dialog_model = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialogModel", ctx)
    dialog_model.Width = 180
    dialog_model.Height = 150
    dialog_model.Title = "Vorschläge übernehmen"
    # ListBox
    lb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlListBoxModel")
    lb_model.Name = "ProposalList"
    lb_model.PositionX = 10
    lb_model.PositionY = 10
    lb_model.Width = 160
    lb_model.Height = 80
    lb_model.StringItemList = tuple(proposals)
    dialog_model.insertByName("ProposalList", lb_model)
    # Checkbox
    cb_model = dialog_model.createInstance("com.sun.star.awt.UnoControlCheckBoxModel")
    cb_model.Name = "AllCheck"
    cb_model.PositionX = 10
    cb_model.PositionY = 95
    cb_model.Width = 160
    cb_model.Height = 15
    cb_model.Label = "Alle Vorschläge übernehmen"
    dialog_model.insertByName("AllCheck", cb_model)
    # OK-Button
    btn_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
    btn_model.Name = "OKButton"
    btn_model.PositionX = 10
    btn_model.PositionY = 115
    btn_model.Width = 80
    btn_model.Height = 20
    btn_model.Label = "OK"
    dialog_model.insertByName("OKButton", btn_model)
    # Abbrechen-Button
    cancel_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
    cancel_model.Name = "CancelButton"
    cancel_model.PositionX = 100
    cancel_model.PositionY = 115
    cancel_model.Width = 80
    cancel_model.Height = 20
    cancel_model.Label = "Abbrechen"
    dialog_model.insertByName("CancelButton", cancel_model)
    # Control Dialog
    dialog = smgr.createInstanceWithContext("com.sun.star.awt.UnoControlDialog", ctx)
    dialog.setModel(dialog_model)
    dialog.setVisible(True)
    toolkit.createPeer(dialog, None)
    # Warten auf OK
    while True:
        import time
        time.sleep(0.1)
        # Prüfen auf Klick
        if dialog.getControl("OKButton").Pressed:
            all_flag = dialog.getControl("AllCheck").State == 1
            selected_idx = dialog.getControl("ProposalList").SelectedItems
            if selected_idx:
                selected_proposal = proposals[selected_idx[0]]
            else:
                selected_proposal = None
            break
        elif dialog.getControl("CancelButton").Pressed:
            dialog.endExecute()
            return
    # Anwenden
    obj_cell = sheet.getCellByPosition(objekt_col, cell.RangeAddress.StartRow)
    obj_text = obj_cell.String
    if all_flag:
        for prop in proposals:
            idx = obj_text.lower().find(prop.lower())
            if idx != -1:
                obj_text = obj_text[:idx] + prop + obj_text[idx+len(prop):]
    else:
        if selected_proposal:
            idx = obj_text.lower().find(selected_proposal.lower())
            if idx != -1:
                obj_text = obj_text[:idx] + selected_proposal + obj_text[idx+len(selected_proposal):]
    obj_cell.String = obj_text
    obj_cell.CellBackColor = 0x00FF00  # grün
    dialog.endExecute()
    save_cache()
    log(f"Vorschlag übernommen: {obj_text}")
 # =========================
 # Automatische Button-Registrierung
 # =========================
 def register_toolbar_button():
    ctx = uno.getComponentContext()
    smgr = ctx.ServiceManager
    desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
    doc = desktop.getCurrentComponent()
    frame = doc.CurrentController.Frame
    # Button kann manuell über Makro-Menü an Toolbar gebunden werden
    # Hier wird nur das Makro selbst registriert
    # Symbolleiste muss in LO einmalig erstellt werden
 # =========================
 # Hauptmakro
 # =========================
 def run_mapper_macro():
    try:
        norm_dict = load_normvokabular(NV_MASTER_FILE)
        log(f"NV_MASTER geladen ({len(norm_dict)} Begriffe)")
        apply_proposals_dialog()
    except Exception as e:
        log(f"Fehler in run_mapper_macro: {e}")
--- a/normmapper_macro.py
+++ b/normmapper_macro.py
@ -0,0 +1,121 @@
 import uno
 import json
 import subprocess
 from pathlib import Path
 from com.sun.star.awt import XActionListener
 # Farbwerte (BGR)
 GREEN = 0xC6EFCE
 RED = 0xFFC7CE
 YELLOW = 0xFFEB9C
 def get_objektbeschreibung_column(sheet):
    """Findet die Spalte 'Objektbeschreibung'."""
    for row in range(sheet.Rows.Count):
        for col in range(sheet.Columns.Count):
            cell = sheet.getCellByPosition(col, row)
            if cell.String.strip().lower() == "objektbeschreibung":
                return col
    return None
 def update_cell_color(cell, status):
    """Färbt die Zelle."""
    if status == "grün":
        cell.CellBackColor = GREEN
    elif status == "gelb":
        cell.CellBackColor = YELLOW
    else:
        cell.CellBackColor = RED
 def call_mapper(term):
    """Ruft den lokalen Wrapper auf."""
    wrapper = Path("/home/jarnold/projects/GND-Skript Test/NormVokabular_Mapper_Wrapper.py")
    if not wrapper.exists():
        return {"term": term, "norm_name": "KEIN TREFFER", "norm_id": "", "suggestions": []}
    result = subprocess.run(
        ["python3", str(wrapper), term],
        capture_output=True,
        text=True
    )
    try:
        output = json.loads(result.stdout)
    except:
        output = {"term": term, "norm_name": "KEIN TREFFER", "norm_id": "", "suggestions": []}
    return output
 class SuggestionListener(XActionListener):
    """Listener für Klick auf Vorschlag-Button."""
    def __init__(self, cell, suggestion, dialog):
        self.cell = cell
        self.suggestion = suggestion
        self.dialog = dialog
    def actionPerformed(self, event):
        self.cell.String = self.suggestion
        update_cell_color(self.cell, "grün")
        self.dialog.endExecute()  # schließt das Dialogfenster
    def disposing(self, event):
        pass
 def show_suggestion_dialog(cell, term, suggestions):
    """Zeigt ein Dialog-Fenster mit klickbaren Vorschlägen."""
    ctx = XSCRIPTCONTEXT.getComponentContext()
    smgr = ctx.getServiceManager()
    toolkit = smgr.createInstance("com.sun.star.awt.Toolkit")
    dialog_model = smgr.createInstance("com.sun.star.awt.UnoControlDialogModel")
    dialog_model.PositionX = 100
    dialog_model.PositionY = 100
    dialog_model.Width = 200
    dialog_model.Height = 30 + 25*len(suggestions)
    dialog_model.Title = f"Vorschläge für '{term}'"
    for i, sugg in enumerate(suggestions[:3]):
        btn_model = dialog_model.createInstance("com.sun.star.awt.UnoControlButtonModel")
        btn_model.Name = f"btn_{i}"
        btn_model.Label = sugg
        btn_model.PositionX = 10
        btn_model.PositionY = 10 + i*25
        btn_model.Width = 180
        btn_model.Height = 20
        dialog_model.insertByName(btn_model.Name, btn_model)
    dialog = smgr.createInstance("com.sun.star.awt.UnoControlDialog")
    dialog.setModel(dialog_model)
    dialog.setVisible(True)
    for i, sugg in enumerate(suggestions[:3]):
        btn = dialog.getControl(f"btn_{i}")
        listener = SuggestionListener(cell, sugg, dialog)
        btn.addActionListener(listener)
    toolkit.createDialog(dialog).execute()
 def mapper_process_column():
    """Verarbeitet alle Zellen unter 'Objektbeschreibung' in der aktiven Tabelle."""
    doc = XSCRIPTCONTEXT.getDocument()
    sheet = doc.CurrentController.ActiveSheet
    col_index = get_objektbeschreibung_column(sheet)
    if col_index is None:
        return
    for row in range(sheet.Rows.Count):
        cell = sheet.getCellByPosition(col_index, row)
        if not cell.String.strip():
            continue  # leere Zelle ignorieren
        term = cell.String.strip()
        result = call_mapper(term)
        if result["norm_name"] != "KEIN TREFFER":
            cell.String = result["norm_name"]
            update_cell_color(cell, "grün")
        elif result["suggestions"]:
            update_cell_color(cell, "gelb")
            show_suggestion_dialog(cell, term, result["suggestions"])
        else:
            update_cell_color(cell, "rot")
            show_suggestion_dialog(cell, term, [])
 # Export
 g_exportedScripts = mapper_process_column,
--- a/test.py
+++ b/test.py
--- a/venv/bin/Activate.ps1
+++ b/venv/bin/Activate.ps1
@ -0,0 +1,247 @@
 <#
 .Synopsis
 Activate a Python virtual environment for the current PowerShell session.
 .Description
 Pushes the python executable for a virtual environment to the front of the
 $Env:PATH environment variable and sets the prompt to signify that you are
 in a Python virtual environment. Makes use of the command line switches as
 well as the `pyvenv.cfg` file values present in the virtual environment.
 .Parameter VenvDir
 Path to the directory that contains the virtual environment to activate. The
 default value for this is the parent of the directory that the Activate.ps1
 script is located within.
 .Parameter Prompt
 The prompt prefix to display when this virtual environment is activated. By
 default, this prompt is the name of the virtual environment folder (VenvDir)
 surrounded by parentheses and followed by a single space (ie. '(.venv) ').
 .Example
 Activate.ps1
 Activates the Python virtual environment that contains the Activate.ps1 script.
 .Example
 Activate.ps1 -Verbose
 Activates the Python virtual environment that contains the Activate.ps1 script,
 and shows extra information about the activation as it executes.
 .Example
 Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
 Activates the Python virtual environment located in the specified location.
 .Example
 Activate.ps1 -Prompt "MyPython"
 Activates the Python virtual environment that contains the Activate.ps1 script,
 and prefixes the current prompt with the specified string (surrounded in
 parentheses) while the virtual environment is active.
 .Notes
 On Windows, it may be required to enable this Activate.ps1 script by setting the
 execution policy for the user. You can do this by issuing the following PowerShell
 command:
 PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
 For more information on Execution Policies: 
 https://go.microsoft.com/fwlink/?LinkID=135170
 #>
 Param(
    [Parameter(Mandatory = $false)]
    [String]
    $VenvDir,
    [Parameter(Mandatory = $false)]
    [String]
    $Prompt
 )
 <# Function declarations --------------------------------------------------- #>
 <#
 .Synopsis
 Remove all shell session elements added by the Activate script, including the
 addition of the virtual environment's Python executable from the beginning of
 the PATH variable.
 .Parameter NonDestructive
 If present, do not remove this function from the global namespace for the
 session.
 #>
 function global:deactivate ([switch]$NonDestructive) {
    # Revert to original values
    # The prior prompt:
    if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
        Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
        Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
    }
    # The prior PYTHONHOME:
    if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
        Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
        Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
    }
    # The prior PATH:
    if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
        Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
        Remove-Item -Path Env:_OLD_VIRTUAL_PATH
    }
    # Just remove the VIRTUAL_ENV altogether:
    if (Test-Path -Path Env:VIRTUAL_ENV) {
        Remove-Item -Path env:VIRTUAL_ENV
    }
    # Just remove VIRTUAL_ENV_PROMPT altogether.
    if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
        Remove-Item -Path env:VIRTUAL_ENV_PROMPT
    }
    # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
    if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
        Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
    }
    # Leave deactivate function in the global namespace if requested:
    if (-not $NonDestructive) {
        Remove-Item -Path function:deactivate
    }
 }
 <#
 .Description
 Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
 given folder, and returns them in a map.
 For each line in the pyvenv.cfg file, if that line can be parsed into exactly
 two strings separated by `=` (with any amount of whitespace surrounding the =)
 then it is considered a `key = value` line. The left hand string is the key,
 the right hand is the value.
 If the value starts with a `'` or a `"` then the first and last character is
 stripped from the value before being captured.
 .Parameter ConfigDir
 Path to the directory that contains the `pyvenv.cfg` file.
 #>
 function Get-PyVenvConfig(
    [String]
    $ConfigDir
 ) {
    Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
    # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
    $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
    # An empty map will be returned if no config file is found.
    $pyvenvConfig = @{ }
    if ($pyvenvConfigPath) {
        Write-Verbose "File exists, parse `key = value` lines"
        $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
        $pyvenvConfigContent | ForEach-Object {
            $keyval = $PSItem -split "\s*=\s*", 2
            if ($keyval[0] -and $keyval[1]) {
                $val = $keyval[1]
                # Remove extraneous quotations around a string value.
                if ("'""".Contains($val.Substring(0, 1))) {
                    $val = $val.Substring(1, $val.Length - 2)
                }
                $pyvenvConfig[$keyval[0]] = $val
                Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
            }
        }
    }
    return $pyvenvConfig
 }
 <# Begin Activate script --------------------------------------------------- #>
 # Determine the containing directory of this script
 $VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
 $VenvExecDir = Get-Item -Path $VenvExecPath
 Write-Verbose "Activation script is located in path: '$VenvExecPath'"
 Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
 Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
 # Set values required in priority: CmdLine, ConfigFile, Default
 # First, get the location of the virtual environment, it might not be
 # VenvExecDir if specified on the command line.
 if ($VenvDir) {
    Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
 }
 else {
    Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
    $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
    Write-Verbose "VenvDir=$VenvDir"
 }
 # Next, read the `pyvenv.cfg` file to determine any required value such
 # as `prompt`.
 $pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
 # Next, set the prompt from the command line, or the config file, or
 # just use the name of the virtual environment folder.
 if ($Prompt) {
    Write-Verbose "Prompt specified as argument, using '$Prompt'"
 }
 else {
    Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
    if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
        Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
        $Prompt = $pyvenvCfg['prompt'];
    }
    else {
        Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
        Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
        $Prompt = Split-Path -Path $venvDir -Leaf
    }
 }
 Write-Verbose "Prompt = '$Prompt'"
 Write-Verbose "VenvDir='$VenvDir'"
 # Deactivate any currently active virtual environment, but leave the
 # deactivate function in place.
 deactivate -nondestructive
 # Now set the environment variable VIRTUAL_ENV, used by many tools to determine
 # that there is an activated venv.
 $env:VIRTUAL_ENV = $VenvDir
 if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
    Write-Verbose "Setting prompt to '$Prompt'"
    # Set the prompt to include the env name
    # Make sure _OLD_VIRTUAL_PROMPT is global
    function global:_OLD_VIRTUAL_PROMPT { "" }
    Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
    New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
    function global:prompt {
        Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
        _OLD_VIRTUAL_PROMPT
    }
    $env:VIRTUAL_ENV_PROMPT = $Prompt
 }
 # Clear PYTHONHOME
 if (Test-Path -Path Env:PYTHONHOME) {
    Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
    Remove-Item -Path Env:PYTHONHOME
 }
 # Add the venv to the PATH
 Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
 $Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
--- a/venv/bin/activate
+++ b/venv/bin/activate
@ -0,0 +1,69 @@
 # This file must be used with "source bin/activate" *from bash*
 # you cannot run it directly
 deactivate () {
    # reset old environment variables
    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
        PATH="${_OLD_VIRTUAL_PATH:-}"
        export PATH
        unset _OLD_VIRTUAL_PATH
    fi
    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
        export PYTHONHOME
        unset _OLD_VIRTUAL_PYTHONHOME
    fi
    # This should detect bash and zsh, which have a hash command that must
    # be called to get it to forget past commands.  Without forgetting
    # past commands the $PATH changes we made may not be respected
    if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
        hash -r 2> /dev/null
    fi
    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
        PS1="${_OLD_VIRTUAL_PS1:-}"
        export PS1
        unset _OLD_VIRTUAL_PS1
    fi
    unset VIRTUAL_ENV
    unset VIRTUAL_ENV_PROMPT
    if [ ! "${1:-}" = "nondestructive" ] ; then
    # Self destruct!
        unset -f deactivate
    fi
 }
 # unset irrelevant variables
 deactivate nondestructive
 VIRTUAL_ENV='/home/jarnold/projects/GND-Skript Test/venv'
 export VIRTUAL_ENV
 _OLD_VIRTUAL_PATH="$PATH"
 PATH="$VIRTUAL_ENV/"bin":$PATH"
 export PATH
 # unset PYTHONHOME if set
 # this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
 # could use `if (set -u; : $PYTHONHOME) ;` in bash
 if [ -n "${PYTHONHOME:-}" ] ; then
    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
    unset PYTHONHOME
 fi
 if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
    _OLD_VIRTUAL_PS1="${PS1:-}"
    PS1='(venv) '"${PS1:-}"
    export PS1
    VIRTUAL_ENV_PROMPT='(venv) '
    export VIRTUAL_ENV_PROMPT
 fi
 # This should detect bash and zsh, which have a hash command that must
 # be called to get it to forget past commands.  Without forgetting
 # past commands the $PATH changes we made may not be respected
 if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
    hash -r 2> /dev/null
 fi
--- a/venv/bin/activate.csh
+++ b/venv/bin/activate.csh
@ -0,0 +1,26 @@
 # This file must be used with "source bin/activate.csh" *from csh*.
 # You cannot run it directly.
 # Created by Davide Di Blasi <davidedb@gmail.com>.
 # Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
 alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
 # Unset irrelevant variables.
 deactivate nondestructive
 setenv VIRTUAL_ENV '/home/jarnold/projects/GND-Skript Test/venv'
 set _OLD_VIRTUAL_PATH="$PATH"
 setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
 set _OLD_VIRTUAL_PROMPT="$prompt"
 if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
    set prompt = '(venv) '"$prompt"
    setenv VIRTUAL_ENV_PROMPT '(venv) '
 endif
 alias pydoc python -m pydoc
 rehash
--- a/venv/bin/activate.fish
+++ b/venv/bin/activate.fish
@ -0,0 +1,69 @@
 # This file must be used with "source <venv>/bin/activate.fish" *from fish*
 # (https://fishshell.com/); you cannot run it directly.
 function deactivate  -d "Exit virtual environment and return to normal shell environment"
    # reset old environment variables
    if test -n "$_OLD_VIRTUAL_PATH"
        set -gx PATH $_OLD_VIRTUAL_PATH
        set -e _OLD_VIRTUAL_PATH
    end
    if test -n "$_OLD_VIRTUAL_PYTHONHOME"
        set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
        set -e _OLD_VIRTUAL_PYTHONHOME
    end
    if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
        set -e _OLD_FISH_PROMPT_OVERRIDE
        # prevents error when using nested fish instances (Issue #93858)
        if functions -q _old_fish_prompt
            functions -e fish_prompt
            functions -c _old_fish_prompt fish_prompt
            functions -e _old_fish_prompt
        end
    end
    set -e VIRTUAL_ENV
    set -e VIRTUAL_ENV_PROMPT
    if test "$argv[1]" != "nondestructive"
        # Self-destruct!
        functions -e deactivate
    end
 end
 # Unset irrelevant variables.
 deactivate nondestructive
 set -gx VIRTUAL_ENV '/home/jarnold/projects/GND-Skript Test/venv'
 set -gx _OLD_VIRTUAL_PATH $PATH
 set -gx PATH "$VIRTUAL_ENV/"bin $PATH
 # Unset PYTHONHOME if set.
 if set -q PYTHONHOME
    set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
    set -e PYTHONHOME
 end
 if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
    # fish uses a function instead of an env var to generate the prompt.
    # Save the current fish_prompt function as the function _old_fish_prompt.
    functions -c fish_prompt _old_fish_prompt
    # With the original prompt function renamed, we can override with our own.
    function fish_prompt
        # Save the return status of the last command.
        set -l old_status $status
        # Output the venv prompt; color taken from the blue of the Python logo.
        printf "%s%s%s" (set_color 4B8BBE) '(venv) ' (set_color normal)
        # Restore the return status of the previous command.
        echo "exit $old_status" | .
        # Output the original/"old" prompt.
        _old_fish_prompt
    end
    set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
    set -gx VIRTUAL_ENV_PROMPT '(venv) '
 end
--- a/venv/bin/csv2ods
+++ b/venv/bin/csv2ods
@ -0,0 +1,229 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2008 Agustin Henze -> agustinhenze at gmail.com
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
 #
 # Søren Roug
 #
 # Oct 2014: Georges Khaznadar <georgesk@debian.org>
 #   - ported to Python3
 #   - imlemented the missing switch -c / --encoding, with an extra
 #     feature for POSIX platforms which can guess encoding.
 from odf.opendocument import OpenDocumentSpreadsheet
 from odf.style import Style, TextProperties, ParagraphProperties, TableColumnProperties
 from odf.text import P
 from odf.table import Table, TableColumn, TableRow, TableCell
 from optparse import OptionParser
 import sys,csv,re, os, codecs
 if sys.version_info[0]==3: unicode=str
 if sys.version_info[0]==2:
    class UTF8Recoder:
        """
        Iterator that reads an encoded stream and reencodes the input to UTF-8
        """
        def __init__(self, f, encoding):
            self.reader = codecs.getreader(encoding)(f)
        def __iter__(self):
            return self
        def next(self):
            return self.reader.next().encode("utf-8")
    class UnicodeReader:
        """
        A CSV reader which will iterate over lines in the CSV file "f",
        which is encoded in the given encoding.
        """
        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
            f = UTF8Recoder(f, encoding)
            self.reader = csv.reader(f, dialect=dialect, **kwds)
        def next(self):
            row = self.reader.next()
            return [unicode(s, "utf-8") for s in row]
        def __iter__(self):
            return self
 def csvToOds( pathFileCSV, pathFileODS, tableName='table',
              delimiter=',', quoting=csv.QUOTE_MINIMAL,
              quotechar = '"', escapechar = None,
              skipinitialspace = False, lineterminator = '\r\n',
              encoding="utf-8"):
    textdoc = OpenDocumentSpreadsheet()
    # Create a style for the table content. One we can modify
    # later in the word processor.
    tablecontents = Style(name="Table Contents", family="paragraph")
    tablecontents.addElement(ParagraphProperties(numberlines="false", linenumber="0"))
    tablecontents.addElement(TextProperties(fontweight="bold"))
    textdoc.styles.addElement(tablecontents)
    # Start the table
    table = Table( name=tableName )
    if sys.version_info[0]==3:
        reader = csv.reader(open(pathFileCSV, encoding=encoding),
                            delimiter=delimiter,
                            quoting=quoting,
                            quotechar=quotechar,
                            escapechar=escapechar,
                            skipinitialspace=skipinitialspace,
                            lineterminator=lineterminator)
    else:
        reader = UnicodeReader(open(pathFileCSV),
                               encoding=encoding,
                               delimiter=delimiter,
                               quoting=quoting,
                               quotechar=quotechar,
                               escapechar=escapechar,
                               skipinitialspace=skipinitialspace,
                               lineterminator=lineterminator)
    fltExp = re.compile('^\s*[-+]?\d+(\.\d+)?\s*$')
    for row in reader:
        tr = TableRow()
        table.addElement(tr)
        for val in row:
            if fltExp.match(val):
                tc = TableCell(valuetype="float", value=val.strip())
            else:
                tc = TableCell(valuetype="string")
            tr.addElement(tc)
            p = P(stylename=tablecontents,text=val)
            tc.addElement(p)
        textdoc.spreadsheet.addElement(table)
        textdoc.save( pathFileODS )
 if __name__ == "__main__":
    usage = "%prog -i file.csv -o file.ods -d"
    parser = OptionParser(usage=usage, version="%prog 0.1")
    parser.add_option('-i','--input', action='store',
                      dest='input', help='File input in csv')
    parser.add_option('-o','--output', action='store',
                      dest='output', help='File output in ods')
    parser.add_option('-d','--delimiter', action='store',
                      dest='delimiter', help='specifies a one-character string to use as the field separator.  It defaults to ",".')
    parser.add_option('-c','--encoding', action='store',
                      dest='encoding', help='specifies the encoding the file csv. It defaults to utf-8')
    parser.add_option('-t','--table', action='store',
                      dest='tableName', help='The table name in the output file')
    parser.add_option('-s','--skipinitialspace',
                      dest='skipinitialspace', help='''specifies how to interpret whitespace which
                                                immediately follows a delimiter.  It defaults to False, which
                                                means that whitespace immediately following a delimiter is part
                                                of the following field.''')
    parser.add_option('-l','--lineterminator', action='store',
                      dest='lineterminator', help='''specifies the character sequence which should
                                                terminate rows.''')
    parser.add_option('-q','--quoting', action='store',
                      dest='quoting', help='''It can take on any of the following module constants:
                                                0 = QUOTE_MINIMAL means only when required, for example, when a field contains either the quotechar or the delimiter
                                                1 = QUOTE_ALL means that quotes are always placed around fields.
                                                2 = QUOTE_NONNUMERIC means that quotes are always placed around fields which do not parse as integers or floating point numbers.
                                                3 = QUOTE_NONE means that quotes are never placed around fields.
                                                It defaults is QUOTE_MINIMAL''')
    parser.add_option('-e','--escapechar', action='store',
                      dest='escapechar', help='''specifies a one-character string used to escape the delimiter when quoting is set to QUOTE_NONE.''')
    parser.add_option('-r','--quotechar', action='store',
                      dest='quotechar', help='''specifies a one-character string to use as the quoting character.  It defaults to ".''')
    (options, args) = parser.parse_args()
    if options.input:
        pathFileCSV = options.input
    else:
        parser.print_help()
        exit( 0 )
    if options.output:
        pathFileODS = options.output
    else:
        parser.print_help()
        exit( 0 )
    if options.delimiter:
        delimiter = options.delimiter
    else:
        delimiter = ","
    if options.skipinitialspace:
        skipinitialspace = True
    else:
        skipinitialspace=False
    if options.lineterminator:
        lineterminator = options.lineterminator
    else:
        lineterminator ="\r\n"
    if options.escapechar:
        escapechar = options.escapechar
    else:
        escapechar=None
    if options.tableName:
        tableName = options.tableName
    else:
        tableName = "table"
    if options.quotechar:
        quotechar = options.quotechar
    else:
        quotechar = "\""
    encoding = "utf-8" # default setting
    ###########################################################
    ## try to guess the encoding; this is implemented only with
    ## POSIX platforms. Can it be improved?
    output = os.popen('/usr/bin/file ' + pathFileCSV).read()
    m=re.match(r'^.*: ([-a-zA-Z0-9]+) text$', output)
    if m:
        encoding=m.group(1)
        if 'ISO-8859' in encoding:
            encoding="latin-1"
    else:
        encoding="utf-8"
    ############################################################
    # when the -c or --coding switch is used, it takes precedence
    if options.encoding:
        encoding = options.encoding
    csvToOds( pathFileCSV=unicode(pathFileCSV),
              pathFileODS=unicode(pathFileODS),
              delimiter=delimiter, skipinitialspace=skipinitialspace,
              escapechar=escapechar,
              lineterminator=unicode(lineterminator),
              tableName=tableName, quotechar=quotechar,
              encoding=encoding)
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/bin/csv2rdf
+++ b/venv/bin/csv2rdf
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from rdflib.tools.csv2rdf import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/f2py
+++ b/venv/bin/f2py
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from numpy.f2py.f2py2e import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/mailodf
+++ b/venv/bin/mailodf
@ -0,0 +1,95 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2006 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 # 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
 #
 from odf.odf2xhtml import ODF2XHTML
 import zipfile
 import sys, os, smtplib, getopt
 from email.mime.multipart import MIMEMultipart
 from email.mime.nonmultipart import MIMENonMultipart
 from email.mime.text import MIMEText
 from email.encoders import encode_base64
 if sys.version_info[0]==3: unicode=str
 def usage():
   sys.stderr.write("Usage: %s [-f from] [-s subject] inputfile recipients...\n" % sys.argv[0])
 try:
    opts, args = getopt.getopt(sys.argv[1:], "f:s:", ["from=", "subject="])
 except getopt.GetoptError:
    usage()
    sys.exit(2)
 fromaddr = os.getlogin() + "@" + os.getenv('HOSTNAME','localhost')
 subject = None
 for o, a in opts:
    if o in ("-f", "--from"):
        fromaddr = a
    if o in ("-s", "--subject"):
        subject = a
 if len(args) < 2:
    usage()
    sys.exit(2)
 suffices = {
 'wmf':('image','x-wmf'),
 'png':('image','png'),
 'gif':('image','gif'),
 'jpg':('image','jpeg'),
 'jpeg':('image','jpeg')
 }
 msg = MIMEMultipart('related',type="text/html")
 msg['From'] = fromaddr
 #   msg['Date'] = strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
 msg['To'] = ','.join(args[1:])
 msg.preamble = 'This is a multi-part message in MIME format.'
 msg.epilogue = ''
 odhandler = ODF2XHTML()
 result = odhandler.odf2xhtml(unicode(args[0]))
 if subject:
    msg['Subject'] = subject
 else:
    msg['Subject'] = odhandler.title
 htmlpart = MIMEText(result,'html','us-ascii')
 htmlpart['Content-Location'] = 'index.html'
 msg.attach(htmlpart)
 z = zipfile.ZipFile(unicode(args[0]))
 for file in z.namelist():
    if file[0:9] == 'Pictures/':
        suffix = file[file.rfind(".")+1:]
        main,sub = suffices.get(suffix,('application','octet-stream')) 
        img = MIMENonMultipart(main,sub)
        img.set_payload(z.read(file))
        img['Content-Location'] = "" + file
        encode_base64(img)
        msg.attach(img)
 z.close()
 server = smtplib.SMTP('localhost')
 #server.set_debuglevel(1)
 server.sendmail(fromaddr, args[1:], msg.as_string())
 server.quit()
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/bin/markdown-it
+++ b/venv/bin/markdown-it
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from markdown_it.cli.parse import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/nltk
+++ b/venv/bin/nltk
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from nltk.cli import cli
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(cli())
--- a/venv/bin/normalizer
+++ b/venv/bin/normalizer
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from charset_normalizer.cli import cli_detect
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(cli_detect())
--- a/venv/bin/numpy-config
+++ b/venv/bin/numpy-config
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from numpy._configtool import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/odf2mht
+++ b/venv/bin/odf2mht
@ -0,0 +1,72 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2006 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
 #
 from __future__ import print_function
 from odf.odf2xhtml import ODF2XHTML
 import zipfile
 import sys
 #from time import gmtime, strftime
 from email.mime.multipart import MIMEMultipart
 from email.mime.nonmultipart import MIMENonMultipart
 from email.mime.text import MIMEText
 from email import encoders
 if sys.version_info[0]==3: unicode=str
 if len(sys.argv) != 2:
    sys.stderr.write("Usage: %s inputfile\n" % sys.argv[0])
    sys.exit(1)
 suffices = {
 'wmf':('image','x-wmf'),
 'png':('image','png'),
 'gif':('image','gif'),
 'jpg':('image','jpeg'),
 'jpeg':('image','jpeg')
 }
 msg = MIMEMultipart('related',type="text/html")
 #   msg['Subject'] = 'Subject here'
 #   msg['From'] = '<Saved by ODT2MHT>'
 #   msg['Date'] = strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
 msg.preamble = 'This is a multi-part message in MIME format.'
 msg.epilogue = ''
 odhandler = ODF2XHTML()
 result = odhandler.odf2xhtml(unicode(sys.argv[1]))
 htmlpart = MIMEText(result,'html','us-ascii')
 htmlpart['Content-Location'] = 'index.html'
 msg.attach(htmlpart)
 z = zipfile.ZipFile(sys.argv[1])
 for file in z.namelist():
    if file[0:9] == 'Pictures/':
        suffix = file[file.rfind(".")+1:]
        main,sub = suffices.get(suffix,('application','octet-stream')) 
        img = MIMENonMultipart(main,sub)
        img.set_payload(z.read(file))
        img['Content-Location'] = "" + file
        encoders.encode_base64(img)
        msg.attach(img)
 z.close()
 print (msg.as_string())
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/bin/odf2xhtml
+++ b/venv/bin/odf2xhtml
@ -0,0 +1,59 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2007 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
 #
 from odf.odf2xhtml import ODF2XHTML
 import sys, getopt
 if sys.version_info[0]==3: unicode=str
 from io import StringIO
 def usage():
   sys.stderr.write("Usage: %s [-p] inputfile\n" % sys.argv[0])
 try:
    opts, args = getopt.getopt(sys.argv[1:], "ep", ["plain","embedable"])
 except getopt.GetoptError:
    usage()
    sys.exit(2)
 generatecss = True
 embedable = False
 for o, a in opts:
    if o in ("-p", "--plain"):
        generatecss = False
    if o in ("-e", "--embedable"):
        embedable = True
 if len(args) != 1:
    usage()
    sys.exit(2)
 odhandler = ODF2XHTML(generatecss, embedable)
 try:
   result = odhandler.odf2xhtml(unicode(args[0]))
 except:
    sys.stderr.write("Unable to open file %s or file is not OpenDocument\n" % args[0])
    sys.exit(1)
 sys.stdout.write(result)
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/bin/odf2xml
+++ b/venv/bin/odf2xml
@ -0,0 +1,81 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2008 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
 #
 #
 # OpenDocument can be a complete office document in a single
 # XML document. This script will create such a document.
 import sys, getopt, base64
 from odf.opendocument import load
 from odf.draw import Image, ObjectOle
 from odf.style import BackgroundImage
 from odf.text import ListLevelStyleImage
 from odf.office import BinaryData
 if sys.version_info[0]==3: unicode=str
 def usage():
   sys.stderr.write("Usage: %s [-e] [-o outputfile] [inputfile]\n" % sys.argv[0])
 if __name__ == "__main__":
    embedimage = False
    try:
        opts, args = getopt.getopt(sys.argv[1:], "o:e", ["output="])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    outputfile = '-'
    for o, a in opts:
        if o in ("-o", "--output"):
            outputfile = a
        if o == '-e':
            embedimage = True
    if len(args) > 1:
        usage()
        sys.exit(2)
    if len(args) == 0:
        d = load(sys.stdin)
    else:
        d = load(unicode(args[0]))
    if embedimage:
        images = d.getElementsByType(Image) + \
           d.getElementsByType(BackgroundImage) +  \
           d.getElementsByType(ObjectOle) + \
           d.getElementsByType(ListLevelStyleImage)
        for image in images:
            href = image.getAttribute('href')
            if href and href[:9] == "Pictures/":
                p = d.Pictures[href]
                bp = base64.encodestring(p[1])
                image.addElement(BinaryData(text=bp))
                image.removeAttribute('href')
    xml = d.xml()
    if outputfile == '-':
       print (xml)
    else:
        open(outputfile,"wb").write(xml)
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/bin/odfimgimport
+++ b/venv/bin/odfimgimport
@ -0,0 +1,190 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2007-2009 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
 #
 from __future__ import print_function
 import zipfile, sys, getopt, mimetypes
 try:
    from urllib2 import urlopen, quote, unquote
 except ImportError:
    from urllib.request import urlopen, quote, unquote
 try:
    from urlparse import urlunsplit, urlsplit
 except ImportError:
    from urllib.parse import urlunsplit, urlsplit
 from odf.opendocument import load
 from odf.draw import Image
 if sys.version_info[0]==3: unicode=str
 #sys.tracebacklimit = 0
 # Variable to count the number of retrieval failures
 failures = 0
 # Set to one if quiet behaviour is wanted
 quiet = 0
 # If set will write every url to import
 verbose = 0
 # Dictionary with new pictures. Key is original file path
 # Item is newfilename
 newpictures = {}
 doc = None
 def importpicture(href):
    """ Add the picture to the ZIP file
        Returns the new path name to the file in the zip archive
        If it is unable to import, then it returns the original href
        Sideeffect: add line to manifest
    """
    global doc, newpictures, failures, verbose
    # Check that it is not already in the manifest
    if href in doc.Pictures: return href
    image = None
    if verbose: print ("Importing", href, file=sys.stderr)
    if href[:7] == "http://" or href[:8] == "https://" or href[:6] == "ftp://":
        # There is a bug in urlopen: It can't open urls with non-ascii unicode
        # characters. Convert to UTF-8 and then use percent encoding
        try:
            goodhref = href.encode('ascii')
        except:
            o = list(urlsplit(href))
            o[2] = quote(o[2].encode('utf-8'))
            goodhref = urlunsplit(o)
        if goodhref in newpictures:
            if verbose: print ("already imported", file=sys.stderr)
            return newpictures[goodhref]  # Already imported
        try:
            f = urlopen(goodhref.decode("utf-8"))
            image = f.read()
            headers = f.info()
            f.close()
            # Get the mimetype from the headerlines
            c_t = headers['Content-Type'].split(';')[0].strip()
            if c_t: mediatype = c_t.split(';')[0].strip()
            if verbose: print ("OK", file=sys.stderr)
        except:
            failures += 1
            if verbose: print ("failed", file=sys.stderr)
            return href
        # Remove query string
        try: href= href[:href.rindex('?')]
        except: pass
        try:
            lastslash = href[href.rindex('/'):]
            ext = lastslash[lastslash.rindex('.'):]
        except: ext = mimetypes.guess_extension(mediatype)
    # Everything is a simple path.
    else:
        goodhref = href
        if href[:3] == '../':
            if directory is None:
                goodhref = unquote(href[3:])
            else:
                goodhref = unquote(directory + href[2:])
        if goodhref in newpictures:
            if verbose: print ("already imported", file=sys.stderr)
            return newpictures[goodhref]  # Already imported
        mediatype, encoding = mimetypes.guess_type(goodhref)
        if mediatype is None:
            mediatype = ''
            try: ext = goodhref[goodhref.rindex('.'):]
            except: ext=''
        else:
            ext = mimetypes.guess_extension(mediatype)
        try:
            image = file(goodhref).read()
            if verbose: print ("OK", file=sys.stderr)
        except:
            failures += 1
            if verbose: print ("failed", file=sys.stderr)
            return href
    # If we have a picture to import, the image variable contains it
    # and manifestfn, ext and mediatype has a value
    if image:
        manifestfn = doc.addPictureFromString(image, unicode(mediatype))
        newpictures[goodhref] = manifestfn
        return manifestfn
    if verbose: print ("not imported", file=sys.stderr)
    return href
 def exitwithusage(exitcode=2):
    """ Print out usage information and exit """
    print ("Usage: %s [-q] [-v] [-o output] [inputfile]" % sys.argv[0], file=sys.stderr)
    print ("\tInputfile must be OpenDocument format", file=sys.stderr)
    sys.exit(exitcode)
 outputfile = None
 writefile = True
 try:
    opts, args = getopt.getopt(sys.argv[1:], "qvo:")
 except getopt.GetoptError:
    exitwithusage()
 for o, a in opts:
    if o == "-o":
        outputfile = a
        writefile = True
    if o == "-q":
        quiet = 1
    if o == "-v":
        verbose = 1
 if len(args) == 0:
    try:
        doc = load(sys.stdin)
        directory = None
    except:
        print ("Couldn't open OpenDocument file", file=sys.stderr)
        exitwithusage()
 else:
    fn = unicode(args[0])
    if not zipfile.is_zipfile(fn):
        exitwithusage()
    dirinx = max(fn.rfind('\\'), fn.rfind('/'))
    if dirinx >= 0: directory = fn[:dirinx]
    else: directory = "."
    doc = load(fn)
 for image in doc.getElementsByType(Image):
    href = image.getAttribute('href')
    newhref = importpicture(href)
    image.setAttribute('href',newhref)
 if writefile:
    if outputfile is None:
        doc.save(fn)
    else:
        doc.save(unicode(outputfile))
 if quiet == 0 and failures > 0:
    print ("Couldn't import %d image(s)" % failures, file=sys.stderr)
 sys.exit( int(failures > 0) )
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/bin/odflint
+++ b/venv/bin/odflint
@ -0,0 +1,216 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2009 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
 #
 import zipfile
 from xml.sax import make_parser,handler
 from xml.sax.xmlreader import InputSource
 import xml.sax.saxutils
 import sys
 from odf.opendocument import OpenDocument
 from odf import element, grammar
 from odf.namespaces import *
 from odf.attrconverters import attrconverters, cnv_string
 from io import BytesIO
 if sys.version_info[0]==3: unicode=str
 extension_attributes = {
 	 "OpenOffice.org" : {
 		(METANS,u'template'): (
 			(XLINKNS,u'role'),
 		),
 		(STYLENS,u'graphic-properties'): (
 			(STYLENS,u'background-transparency'),
 		),
 		(STYLENS,u'paragraph-properties'): (
 			(TEXTNS,u'enable-numbering'),
                        (STYLENS,u'join-border'),
 		),
 		(STYLENS,u'table-cell-properties'): (
 			(STYLENS,u'writing-mode'),
 		),
 		(STYLENS,u'table-row-properties'): (
 			(STYLENS,u'keep-together'),
 		),
 	},
 	"KOffice" : {
 		(STYLENS,u'graphic-properties'): (
 			(KOFFICENS,u'frame-behavior-on-new-page'),
 		),
 		(DRAWNS,u'page'): (
 			(KOFFICENS,u'name'),
 		),
 		(PRESENTATIONNS,u'show-shape'): (
 			(KOFFICENS,u'order-id'),
 		),
 		(PRESENTATIONNS,u'hide-shape'): (
 			(KOFFICENS,u'order-id'),
 		),
 		(CHARTNS,u'legend'): (
 			(KOFFICENS,u'title'),
 		),
 	}
 }
 printed_errors = []
 def print_error(str):
    if str not in printed_errors:
        printed_errors.append(str)
        print (str)
 def chop_arg(arg):
    if len(arg) > 20:
        return "%s..." % arg[0:20]
    return arg
 def make_qname(tag):
    return "%s:%s" % (nsdict.get(tag[0],tag[0]), tag[1])
 def allowed_attributes(tag):
   return grammar.allowed_attributes.get(tag)
 class ODFElementHandler(handler.ContentHandler):
    """ Extract headings from content.xml of an ODT file """
    def __init__(self, document):
        self.doc = document
        self.tagstack = []
        self.data = []
        self.currtag = None
    def characters(self, data):
        self.data.append(data)
    def startElementNS(self, tag, qname, attrs):
        """ Pseudo-create an element
        """
        allowed_attrs = grammar.allowed_attributes.get(tag)
        attrdict = {}
        for (att,value) in attrs.items():
            prefix = nsdict.get(att[0],att[0])
            # Check if it is a known extension
            notan_extension = True
            for product, ext_attrs in extension_attributes.items():
                allowed_ext_attrs = ext_attrs.get(tag)
                if allowed_ext_attrs and att in allowed_ext_attrs:
                    print_error("Warning: Attribute %s in element <%s> is illegal - %s extension"  % ( make_qname(att), make_qname(tag), product))
                    notan_extension = False
            # Check if it is an allowed attribute
            if notan_extension and allowed_attrs and att not in allowed_attrs:
                print_error("Error: Attribute %s:%s is not allowed in element <%s>" % ( prefix, att[1], make_qname(tag)))
            # Check the value
            try:
                convert = attrconverters.get(att, cnv_string)
                convert(att, value, tag)
            except ValueError as res:
                print_error("Error: Bad value '%s' for attribute %s:%s in  tag: <%s> - %s" % 
                    (chop_arg(value), prefix, att[1], make_qname(tag), res))
        self.tagstack.append(tag)
        self.data = []
        # Check that the parent allows this child element
        if tag not in ( (OFFICENS, 'document'), (OFFICENS, 'document-content'), (OFFICENS, 'document-styles'),
             (OFFICENS, 'document-meta'), (OFFICENS, 'document-settings'),
             (MANIFESTNS,'manifest')):
            try:
                parent = self.tagstack[-2]
                allowed_children = grammar.allowed_children.get(parent)
            except:
                print_error("Error: This document starts with the wrong tag: <%s>" % make_qname(tag))
                allowed_children = None
            if allowed_children and tag not in allowed_children:
                print_error("Error: Element %s is not allowed in element %s" % ( make_qname(tag), make_qname(parent)))
        # Test that all mandatory attributes have been added.
        required = grammar.required_attributes.get(tag)
        if required:
            for r in required:
                if attrs.get(r) is None:
                    print_error("Error: Required attribute missing: %s in <%s>" % (make_qname(r), make_qname(tag)))
    def endElementNS(self, tag, qname):
        self.currtag = self.tagstack.pop()
        str = ''.join(self.data).strip()
        # Check that only elements that can take text have text
        # But only elements we know exist in grammar
        if tag in grammar.allowed_children:
            if str != '' and tag not in grammar.allows_text:
                print_error("Error: %s does not allow text data" % make_qname(tag))
        self.data = []
 class ODFDTDHandler(handler.DTDHandler):
    def notationDecl(self, name, public_id, system_id):
        """ Ignore DTDs """
        print_error("Warning: ODF doesn't use DOCTYPEs")
 def exitwithusage(exitcode=2):
    """ print out usage information """
    sys.stderr.write("Usage: %s inputfile\n" % sys.argv[0])
    sys.stderr.write("\tInputfile must be OpenDocument format\n")
    sys.exit(exitcode)
 def lint(odffile):
    if not zipfile.is_zipfile(odffile):
        print_error("Error: This is not a zipped file")
        return
    zfd = zipfile.ZipFile(odffile)
    try:
        mimetype = zfd.read('mimetype')
    except:
        mimetype=''
    d = OpenDocument(unicode(mimetype))
    first = True
    for zi in zfd.infolist():
        if first:
            if zi.filename == 'mimetype':
                if zi.compress_type != zipfile.ZIP_STORED:
                    print_error("Error: The 'mimetype' member must be stored - not deflated")
                if zi.comment != "":
                    print_error("Error: The 'mimetype' member must not have extra header info")
            else:
                print_error("Warning: The first member in the archive should be the mimetype")
        first = False
        if zi.filename in ('META-INF/manifest.xml', 'content.xml', 'meta.xml', 'styles.xml', 'settings.xml'):
            content = zfd.read(zi.filename)
            parser = make_parser()
            parser.setFeature(handler.feature_namespaces, True)
            parser.setFeature(handler.feature_external_ges, False)
            parser.setContentHandler(ODFElementHandler(d))
            dtdh = ODFDTDHandler()
            parser.setDTDHandler(dtdh)
            parser.setErrorHandler(handler.ErrorHandler())
            inpsrc = InputSource()
            if not isinstance(content, str):
                content=content
            inpsrc.setByteStream(BytesIO(content))
            parser.parse(inpsrc)
 if len(sys.argv) != 2:
    exitwithusage()
 lint(unicode(sys.argv[1]))
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/bin/odfmeta
+++ b/venv/bin/odfmeta
@ -0,0 +1,266 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2006-2009 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
 #
 import zipfile, time, sys, getopt, re
 import xml.sax, xml.sax.saxutils
 from odf.namespaces import TOOLSVERSION, OFFICENS, XLINKNS, DCNS, METANS
 from io import BytesIO
 OUTENCODING="utf-8"
 whitespace = re.compile(r'\s+')
 fields = {
 'title':            (DCNS,u'title'),
 'description':      (DCNS,u'description'),
 'subject':          (DCNS,u'subject'),
 'creator':          (DCNS,u'creator'),
 'date':             (DCNS,u'date'),
 'language':         (DCNS,u'language'),
 'generator':        (METANS,u'generator'),
 'initial-creator':  (METANS,u'initial-creator'),
 'keyword':          (METANS,u'keyword'),
 'editing-duration': (METANS,u'editing-duration'),
 'editing-cycles':   (METANS,u'editing-cycles'),
 'printed-by':       (METANS,u'printed-by'),
 'print-date':       (METANS,u'print-date'),
 'creation-date':    (METANS,u'creation-date'),
 'user-defined':     (METANS,u'user-defined'),
 #'template':         (METANS,u'template'),
 }
 xfields = []
 Xfields = []
 addfields = {}
 deletefields = {}
 yieldfields = {}
 showversion = None
 def exitwithusage(exitcode=2):
    """ print out usage information """
    sys.stderr.write("Usage: %s [-cdlvV] [-xXaAI metafield]... [-o output] [inputfile]\n" % sys.argv[0])
    sys.stderr.write("\tInputfile must be OpenDocument format\n")
    sys.exit(exitcode)
 def normalize(str):
    """ 
    The normalize-space function returns the argument string with whitespace
    normalized by stripping leading and trailing whitespace and replacing
    sequences of whitespace characters by a single space.
    """
    return whitespace.sub(' ', str).strip()
 class MetaCollector:
    """
    The MetaCollector is a pseudo file object, that can temporarily ignore write-calls
    It could probably be replaced with a StringIO object.
    """
    def __init__(self):
        self._content = []
        self.dowrite = True
    def write(self, str):
        if self.dowrite:
            self._content.append(str)
    def content(self):
        return ''.join(self._content)
 base = xml.sax.saxutils.XMLGenerator
 class odfmetaparser(base):
    """ Parse a meta.xml file with an event-driven parser and replace elements.
        It would probably be a cleaner approach to use a DOM based parser and
        then manipulate in memory.
        Small issue: Reorders elements
    """
    version = 'Unknown'
    def __init__(self):
        self._mimetype = ''
        self.output = MetaCollector()
        self._data = []
        self.seenfields = {}
        base.__init__(self, self.output, OUTENCODING)
    def startElementNS(self, name, qname, attrs):
        self._data = []
        field = name
 # I can't modify the template until the tool replaces elements at the same
 # location and not at the end
 #       if name == (METANS,u'template'):
 #           self._data = [attrs.get((XLINKNS,u'title'),'')]
        if showversion and name == (OFFICENS,u'document-meta'):
            if showversion == '-V':
                print ("version:%s" % attrs.get((OFFICENS,u'version'),'Unknown').decode('utf-8'))
            else:
                print ("%s" % attrs.get((OFFICENS,u'version'),'Unknown').decode('utf-8'))
        if name == (METANS,u'user-defined'):
            field = attrs.get((METANS,u'name'))
        if field in deletefields:
            self.output.dowrite = False
        elif field in yieldfields:
            del addfields[field]
            base.startElementNS(self, name, qname, attrs)
        else:
            base.startElementNS(self, name, qname, attrs)
        self._tag = field
    def endElementNS(self, name, qname):
        field = name
        if name == (METANS,u'user-defined'):
            field = self._tag
        if name == (OFFICENS,u'meta'):
            for k,v in addfields.items():
                if len(v) > 0:
                    if type(k) == type(''):
                        base.startElementNS(self,(METANS,u'user-defined'),None,{(METANS,u'name'):k})
                        base.characters(self, v)
                        base.endElementNS(self, (METANS,u'user-defined'),None)
                    else:
                        base.startElementNS(self, k, None, {})
                        base.characters(self, v)
                        base.endElementNS(self, k, None)
        if name in xfields:
                print ("%s" % self.data())
        if name in Xfields:
            if isinstance(self._tag, tuple):
                texttag = self._tag[1]
            else:
                texttag = self._tag
            print ("%s:%s" % (texttag, self.data()))
        if field in deletefields:
            self.output.dowrite = True
        else:
            base.endElementNS(self, name, qname)
    def characters(self, content):
        base.characters(self, content)
        self._data.append(content)
    def meta(self):
        return self.output.content()
    def data(self):
        if usenormalize:
            return normalize(''.join(self._data))
        else:
            return ''.join(self._data)
 now = time.localtime()[:6]
 outputfile = "-"
 writemeta = False   # Do we change any meta data?
 usenormalize = False
 try:
    opts, args = getopt.getopt(sys.argv[1:], "cdlvVI:A:a:o:x:X:")
 except getopt.GetoptError:
    exitwithusage()
 if len(opts) == 0:
    opts = [ ('-l','') ]
 for o, a in opts:
    if o in ('-a','-A','-I'):
        writemeta = True
        if a.find(":") >= 0:
            k,v = a.split(":",1)
        else:
            k,v = (a, "")
        if len(k) == 0:
            exitwithusage()
        k = fields.get(k,k)
        addfields[k] = unicode(v,'utf-8')
    if o == '-a':
        yieldfields[k] = True
    if o == '-I':
        deletefields[k] = True
    if o == '-d':
        writemeta = True
        addfields[(DCNS,u'date')] = "%04d-%02d-%02dT%02d:%02d:%02d" % now
        deletefields[(DCNS,u'date')] = True
    if o == '-c':
        usenormalize = True
    if o in ('-v', '-V'):
        showversion = o
    if o == '-l':
        Xfields = fields.values()
    if o == "-x":
        xfields.append(fields.get(a,a))
    if o == "-X":
        Xfields.append(fields.get(a,a))
    if o == "-o":
        outputfile = a
 # The specification says we should change the element to our own,
 # and must not export the original identifier.
 if writemeta:
    addfields[(METANS,u'generator')] = TOOLSVERSION
    deletefields[(METANS,u'generator')] = True
 odfs = odfmetaparser()
 parser = xml.sax.make_parser()
 parser.setFeature(xml.sax.handler.feature_namespaces, 1)
 parser.setContentHandler(odfs)
 if len(args) == 0:
    zin = zipfile.ZipFile(sys.stdin,'r')
 else:
    if not zipfile.is_zipfile(args[0]):
        exitwithusage()
    zin = zipfile.ZipFile(args[0], 'r')
 try:
    content = zin.read('meta.xml').decode('utf-8')
 except:
    sys.stderr.write("File has no meta data\n")
    sys.exit(1)
 parser.parse(BytesIO(content.encode('utf-8')))
 if writemeta:
    if outputfile == '-':
        if sys.stdout.isatty():
           sys.stderr.write("Won't write ODF file to terminal\n")
           sys.exit(1)
        zout = zipfile.ZipFile(sys.stdout,"w")
    else:
        zout = zipfile.ZipFile(outputfile,"w")
    # Loop through the input zipfile and copy the content to the output until we
    # get to the meta.xml. Then substitute.
    for zinfo in zin.infolist():
        if zinfo.filename == "meta.xml":
            # Write meta
            zi = zipfile.ZipInfo("meta.xml", now)
            zi.compress_type = zipfile.ZIP_DEFLATED
            zout.writestr(zi,odfs.meta() )
        else:
            payload = zin.read(zinfo.filename)
            zout.writestr(zinfo, payload)
    zout.close()
 zin.close()
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/bin/odfoutline
+++ b/venv/bin/odfoutline
@ -0,0 +1,144 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2006 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
 #
 from __future__ import print_function
 import zipfile
 from xml.sax import make_parser,handler
 from xml.sax.xmlreader import InputSource
 import xml.sax.saxutils
 import sys
 from odf.namespaces import TEXTNS, TABLENS, DRAWNS
 try:
    from cStringIO import StringIO
 except ImportError:
    from io import StringIO
 def getxmlpart(odffile, xmlfile):
    """ Get the content out of the ODT file"""
    z = zipfile.ZipFile(odffile)
    content = z.read(xmlfile)
    z.close()
    return content
 #
 # Extract headings from content.xml
 #
 class ODTHeadingHandler(handler.ContentHandler):
    """ Extract headings from content.xml of an ODT file """
    def __init__(self, eater):
        self.r = eater
        self.data = []
        self.level = 0
    def characters(self, data):
        self.data.append(data)
    def startElementNS(self, tag, qname, attrs):
        if tag == (TEXTNS, 'h'):
            self.level = 0
            for (att,value) in attrs.items():
                if att == (TEXTNS, 'outline-level'):
                    self.level = int(value)
            self.data = []
    def endElementNS(self, tag, qname):
        if tag == (TEXTNS, 'h'):
            str = ''.join(self.data)
            self.data = []
            self.r.append("%d%*s%s" % (self.level, self.level, '', str))
 class ODTSheetHandler(handler.ContentHandler):
    """ Extract sheet names from content.xml of an ODS file """
    def __init__(self, eater):
        self.r = eater
    def startElementNS(self, tag, qname, attrs):
        if tag == (TABLENS, 'table'):
            sheetname =  attrs.get((TABLENS, 'name'))
            if sheetname:
                self.r.append(sheetname)
 class ODTSlideHandler(handler.ContentHandler):
    """ Extract headings from content.xml of an ODT file """
    def __init__(self, eater):
        self.r = eater
        self.data = []
        self.pagenum = 0
    def characters(self, data):
        self.data.append(data)
    def startElementNS(self, tag, qname, attrs):
        if tag == (DRAWNS, 'page'):
            self.pagenum = self.pagenum + 1
            self.r.append("SLIDE %d: %s" % ( self.pagenum, attrs.get((DRAWNS, 'name'),'')))
        if tag == (TEXTNS, 'p'):
            self.data = []
    def endElementNS(self, tag, qname):
        if tag == (TEXTNS, 'p'):
            str = ''.join(self.data)
            self.data = []
            if len(str) > 0:
                self.r.append(" " + str)
 def odtheadings(odtfile):
    mimetype = getxmlpart(odtfile,'mimetype')
    content = getxmlpart(odtfile,'content.xml')
    lines = []
    parser = make_parser()
    parser.setFeature(handler.feature_namespaces, 1)
    if not isinstance(mimetype, str):
        mimetype=mimetype.decode("utf-8")
    if mimetype in ('application/vnd.oasis.opendocument.text',
      'application/vnd.oasis.opendocument.text-template'):
        parser.setContentHandler(ODTHeadingHandler(lines))
    elif mimetype in ('application/vnd.oasis.opendocument.spreadsheet',
      'application/vnd.oasis.opendocument.spreadsheet-template'):
        parser.setContentHandler(ODTSheetHandler(lines))
    elif mimetype in ('application/vnd.oasis.opendocument.presentation'
      'application/vnd.oasis.opendocument.presentation-template'):
        parser.setContentHandler(ODTSlideHandler(lines))
    else:
        print ("Unsupported fileformat")
        sys.exit(2)
    parser.setErrorHandler(handler.ErrorHandler())
    inpsrc = InputSource()
    if not isinstance(content, str):
        content=content.decode("utf-8")
    inpsrc.setByteStream(StringIO(content))
    parser.parse(inpsrc)
    return lines
 if __name__ == "__main__":
    filler = "          "
    for heading in odtheadings(sys.argv[1]):
        print (heading)
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/bin/odfuserfield
+++ b/venv/bin/odfuserfield
@ -0,0 +1,101 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2006-2007 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s): Michael Howitz, gocept gmbh & co. kg
 import sys
 import getopt
 import odf.userfield
 if sys.version_info[0]==3: unicode=str
 listfields = False
 Listfields = False
 xfields = []
 Xfields = []
 setfields = {}
 outputfile = None
 inputfile = None
 def exitwithusage(exitcode=2):
    """ print out usage information """
    sys.stderr.write("Usage: %s [-lL] [-xX metafield] [-s metafield:value]... "
                     "[-o output] [inputfile]\n" % sys.argv[0])
    sys.stderr.write("\tInputfile must be OpenDocument format\n")
    sys.exit(exitcode)
 try:
    opts, args = getopt.getopt(sys.argv[1:], "lLs:o:x:X:")
 except getopt.GetoptError:
    exitwithusage()
 if len(opts) == 0:
    exitwithusage()
 for o, a in opts:
    if o == '-s':
        if a.find(":") >= 0:
            k,v = a.split(":",1)
        else:
            k,v = (a, "")
        if len(k) == 0:
            exitwithusage()
        setfields[unicode(k)] = unicode(v)
    if o == '-l':
        listfields = True
        Listfields = False
    if o == '-L':
        Listfields = True
        listfields = False
    if o == "-x":
        xfields.append(unicode(a))
    if o == "-X":
        Xfields.append(unicode(a))
    if o == "-o":
        outputfile = unicode(a)
 if len(args) != 0:
    inputfile = unicode(args[0])
 user_fields = odf.userfield.UserFields(inputfile, outputfile)
 if xfields:
    for value in user_fields.list_values(xfields):
        print (value)
 if Listfields or Xfields:
    if Listfields:
        Xfields = None
    for field_name, value_type, value in user_fields.list_fields_and_values(
        Xfields):
        print ("%s#%s:%s" % (field_name, value_type, value))
 if listfields:
    for value in user_fields.list_fields():
        print (value)
 if setfields:
    user_fields.update(setfields)
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/bin/pip
+++ b/venv/bin/pip
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pip._internal.cli.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/pip3
+++ b/venv/bin/pip3
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pip._internal.cli.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/pip3.10
+++ b/venv/bin/pip3.10
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pip._internal.cli.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/pygmentize
+++ b/venv/bin/pygmentize
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pygments.cmdline import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/python
+++ b/venv/bin/python
@ -0,0 +1 @@
 python3
--- a/venv/bin/python3
+++ b/venv/bin/python3
@ -0,0 +1 @@
 /usr/bin/python3
--- a/venv/bin/python3.10
+++ b/venv/bin/python3.10
@ -0,0 +1 @@
 python3
--- a/venv/bin/rdf2dot
+++ b/venv/bin/rdf2dot
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from rdflib.tools.rdf2dot import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/rdfgraphisomorphism
+++ b/venv/bin/rdfgraphisomorphism
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from rdflib.tools.graphisomorphism import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/rdfpipe
+++ b/venv/bin/rdfpipe
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from rdflib.tools.rdfpipe import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/rdfs2dot
+++ b/venv/bin/rdfs2dot
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from rdflib.tools.rdfs2dot import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/rqw
+++ b/venv/bin/rqw
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from SPARQLWrapper.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/spacy
+++ b/venv/bin/spacy
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from spacy.cli import setup_cli
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(setup_cli())
--- a/venv/bin/tqdm
+++ b/venv/bin/tqdm
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from tqdm.cli import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/typer
+++ b/venv/bin/typer
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from typer.cli import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/venv/bin/weasel
+++ b/venv/bin/weasel
@ -0,0 +1,10 @@
 #!/bin/sh
 '''exec' "/home/jarnold/projects/GND-Skript Test/venv/bin/python3" "$0" "$@"
 ' '''
 # -*- coding: utf-8 -*-
 import re
 import sys
 from weasel.cli import app
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(app())
--- a/venv/bin/xml2odf
+++ b/venv/bin/xml2odf
@ -0,0 +1,241 @@
 #!/home/jarnold/projects/GND-Skript Test/venv/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2006 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
 # 2 or at your option any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public
 # License along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
 #
 #
 # OpenDocument can be a complete office document in a single
 # XML document. This script will take such a document and create
 # a package
 import io
 import zipfile,time, sys, getopt
 import xml.sax, xml.sax.saxutils
 from odf import manifest
 class SplitWriter:
    def __init__(self):
        self.activefiles = []
        self._content = []
        self._meta = []
        self._styles = []
        self._settings = []
        self.files = {'content': self._content, 'meta': self._meta,
                 'styles':self._styles, 'settings': self._settings }
    def write(self, str):
        for f in self.activefiles:
            f.append(str)
    def activate(self, filename):
        file = self.files[filename]
        if file not in self.activefiles:
            self.activefiles.append(file)
    def deactivate(self, filename):
        file = self.files[filename]
        if file in self.activefiles:
            self.activefiles.remove(file)
 odmimetypes = {
 'application/vnd.oasis.opendocument.text':                  '.odt',
 'application/vnd.oasis.opendocument.text-template':         '.ott',
 'application/vnd.oasis.opendocument.graphics':              '.odg',
 'application/vnd.oasis.opendocument.graphics-template':     '.otg',
 'application/vnd.oasis.opendocument.presentation':          '.odp',
 'application/vnd.oasis.opendocument.presentation-template': '.otp',
 'application/vnd.oasis.opendocument.spreadsheet':           '.ods',
 'application/vnd.oasis.opendocument.spreadsheet-template':  '.ots',
 'application/vnd.oasis.opendocument.chart':                 '.odc',
 'application/vnd.oasis.opendocument.chart-template':        '.otc',
 'application/vnd.oasis.opendocument.image':                 '.odi',
 'application/vnd.oasis.opendocument.image-template':        '.oti',
 'application/vnd.oasis.opendocument.formula':               '.odf',
 'application/vnd.oasis.opendocument.formula-template':      '.otf',
 'application/vnd.oasis.opendocument.text-master':           '.odm',
 'application/vnd.oasis.opendocument.text-web':              '.oth',
 }
 OFFICENS       = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0"
 base = xml.sax.saxutils.XMLGenerator
 class odfsplitter(base):
    def __init__(self):
        self._mimetype = ''
        self.output = SplitWriter()
        self._prefixes = []
        base.__init__(self, self.output, 'utf-8')
    def startPrefixMapping(self, prefix, uri):
        base.startPrefixMapping(self, prefix, uri)
        self._prefixes.append('xmlns:%s="%s"' % (prefix, uri))
    def startElementNS(self, name, qname, attrs):
        if name == (OFFICENS, u"document"):
            self._mimetype = attrs.get((OFFICENS, "mimetype"))
        elif name == (OFFICENS, u"meta"):
            self.output.activate('meta')
        elif name == (OFFICENS, u"settings"):
            self.output.activate('settings')
        elif name == (OFFICENS, u"scripts"):
            self.output.activate('content')
        elif name == (OFFICENS, u"font-face-decls"):
            self.output.activate('content')
            self.output.activate('styles')
        elif name == (OFFICENS, u"styles"):
            self.output.activate('styles')
        elif name == (OFFICENS, u"automatic-styles"):
            self.output.activate('content')
            self.output.activate('styles')
        elif name == (OFFICENS, u"master-styles"):
            self.output.activate('styles')
        elif name == (OFFICENS, u"body"):
            self.output.activate('content')
        base.startElementNS(self, name, qname, attrs)
    def endElementNS(self, name, qname):
        base.endElementNS(self, name, qname)
        if name == (OFFICENS, u"meta"):
            self.output.deactivate('meta')
        elif name == (OFFICENS, u"settings"):
            self.output.deactivate('settings')
        elif name == (OFFICENS, u"scripts"):
            self.output.deactivate('content')
        elif name == (OFFICENS, u"font-face-decls"):
            self.output.deactivate('content')
            self.output.deactivate('styles')
        elif name == (OFFICENS, u"styles"):
            self.output.deactivate('styles')
        elif name == (OFFICENS, u"automatic-styles"):
            self.output.deactivate('content')
            self.output.deactivate('styles')
        elif name == (OFFICENS, u"master-styles"):
            self.output.deactivate('styles')
        elif name == (OFFICENS, u"body"):
            self.output.deactivate('content')
    def content(self):
        """ Return the content inside a wrapper called <office:document-content>
        """
        prefixes = ' '.join(self._prefixes)
        return  ''.join(['<?xml version="1.0" encoding="UTF-8"?>\n<office:document-content %s office:version="1.0">' % prefixes] + list(map(lambda x: x.decode("utf-8"), self.output._content)) + ['</office:document-content>'])
    def settings(self):
        prefixes =  ' '.join(self._prefixes).encode('utf-8')
        return ''.join( ['<?xml version="1.0" encoding="UTF-8"?>\n<office:document-settings %s office:version="1.0">' % prefixes] + self.output._settings + ['''</office:document-settings>'''])
    def styles(self):
        prefixes =  ' '.join(self._prefixes)
        return ''.join( ['<?xml version="1.0" encoding="UTF-8"?>\n<office:document-styles %s office:version="1.0">' % prefixes] + list(map(lambda x: x.decode("utf-8"), self.output._styles)) + ['''</office:document-styles>'''])
    def meta(self):
        prefixes =  ' '.join(self._prefixes)
        return ''.join( ['<?xml version="1.0" encoding="UTF-8"?>\n<office:document-meta %s office:version="1.0">' % prefixes] + list(map(lambda x: x.decode("utf-8"), self.output._meta)) + ['''</office:document-meta>'''])
 def usage():
   sys.stderr.write("Usage: %s [-o outputfile] [-s] inputfile\n" % sys.argv[0])
 def manifestxml(m):
    """ Generates the content of the manifest.xml file """
    xml=io.StringIO()
    xml.write(u"<?xml version='1.0' encoding='UTF-8'?>\n")
    m.toXml(0,xml)
    return xml.getvalue()
 try:
    opts, args = getopt.getopt(sys.argv[1:], "o:s", ["output=","suffix"])
 except getopt.GetoptError:
    usage()
    sys.exit(2)
 outputfile = '-'
 addsuffix = False
 for o, a in opts:
    if o in ("-o", "--output"):
        outputfile = a
    if o in ("-s", "--suffix"):
        addsuffix = True
 if len(args) > 1:
    usage()
    sys.exit(2)
 odfs = odfsplitter()
 parser = xml.sax.make_parser()
 parser.setFeature(xml.sax.handler.feature_namespaces, 1)
 parser.setContentHandler(odfs)
 if len(args) == 0:
    parser.parse(sys.stdin)
 else:
    parser.parse(open(args[0],"r"))
 mimetype = odfs._mimetype
 suffix = odmimetypes.get(mimetype,'.xxx')
 if outputfile == '-':
    if sys.stdout.isatty():
        sys.stderr.write("Won't write ODF file to terminal\n")
        sys.exit(1)
    z = zipfile.ZipFile(sys.stdout,"w")
 else:
    if addsuffix:
        outputfile = outputfile + suffix
    z = zipfile.ZipFile(outputfile,"w")
 now = time.localtime()[:6]
 # Write mimetype
 zi = zipfile.ZipInfo('mimetype', now)
 zi.compress_type = zipfile.ZIP_STORED
 z.writestr(zi,mimetype)
 # Write content
 zi = zipfile.ZipInfo("content.xml", now)
 zi.compress_type = zipfile.ZIP_DEFLATED
 z.writestr(zi,odfs.content() )
 # Write styles
 zi = zipfile.ZipInfo("styles.xml", now)
 zi.compress_type = zipfile.ZIP_DEFLATED
 z.writestr(zi,odfs.styles() )
 # Write meta
 zi = zipfile.ZipInfo("meta.xml", now)
 zi.compress_type = zipfile.ZIP_DEFLATED
 z.writestr(zi,odfs.meta() )
 m = manifest.Manifest()
 m.addElement(manifest.FileEntry(fullpath="/", mediatype=mimetype))
 m.addElement(manifest.FileEntry(fullpath="content.xml",mediatype="text/xml"))
 m.addElement(manifest.FileEntry(fullpath="styles.xml", mediatype="text/xml"))
 m.addElement(manifest.FileEntry(fullpath="meta.xml",   mediatype="text/xml"))
 # Write manifest
 zi = zipfile.ZipInfo("META-INF/manifest.xml", now)
 zi.compress_type = zipfile.ZIP_DEFLATED
 z.writestr(zi, manifestxml(m).encode("utf-8") )
 z.close()
 # Local Variables: ***
 # mode: python     ***
 # End:             ***
--- a/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/INSTALLER
+++ b/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/INSTALLER
@ -0,0 +1 @@
 pip
--- a/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/LICENSE.txt
+++ b/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/LICENSE.txt
@ -0,0 +1,28 @@
 Copyright 2010 Pallets
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
 met:
 1.  Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
 2.  Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
 3.  Neither the name of the copyright holder nor the names of its
    contributors may be used to endorse or promote products derived from
    this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/METADATA
+++ b/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/METADATA
@ -0,0 +1,92 @@
 Metadata-Version: 2.1
 Name: MarkupSafe
 Version: 3.0.2
 Summary: Safely add untrusted strings to HTML/XML markup.
 Maintainer-email: Pallets <contact@palletsprojects.com>
 License: Copyright 2010 Pallets
        Redistribution and use in source and binary forms, with or without
        modification, are permitted provided that the following conditions are
        met:
        1.  Redistributions of source code must retain the above copyright
            notice, this list of conditions and the following disclaimer.
        2.  Redistributions in binary form must reproduce the above copyright
            notice, this list of conditions and the following disclaimer in the
            documentation and/or other materials provided with the distribution.
        3.  Neither the name of the copyright holder nor the names of its
            contributors may be used to endorse or promote products derived from
            this software without specific prior written permission.
        THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
        "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
        LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
        PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
        HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
        SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
        TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
        PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
        LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
        NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
        SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 Project-URL: Donate, https://palletsprojects.com/donate
 Project-URL: Documentation, https://markupsafe.palletsprojects.com/
 Project-URL: Changes, https://markupsafe.palletsprojects.com/changes/
 Project-URL: Source, https://github.com/pallets/markupsafe/
 Project-URL: Chat, https://discord.gg/pallets
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Environment :: Web Environment
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: BSD License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
 Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
 Classifier: Topic :: Text Processing :: Markup :: HTML
 Classifier: Typing :: Typed
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE.txt
 # MarkupSafe
 MarkupSafe implements a text object that escapes characters so it is
 safe to use in HTML and XML. Characters that have special meanings are
 replaced so that they display as the actual characters. This mitigates
 injection attacks, meaning untrusted user input can safely be displayed
 on a page.
 ## Examples
 ```pycon
 >>> from markupsafe import Markup, escape
 >>> # escape replaces special characters and wraps in Markup
 >>> escape("<script>alert(document.cookie);</script>")
 Markup('&lt;script&gt;alert(document.cookie);&lt;/script&gt;')
 >>> # wrap in Markup to mark text "safe" and prevent escaping
 >>> Markup("<strong>Hello</strong>")
 Markup('<strong>hello</strong>')
 >>> escape(Markup("<strong>Hello</strong>"))
 Markup('<strong>hello</strong>')
 >>> # Markup is a str subclass
 >>> # methods and operators escape their arguments
 >>> template = Markup("Hello <em>{name}</em>")
 >>> template.format(name='"World"')
 Markup('Hello <em>&#34;World&#34;</em>')
 ```
 ## Donate
 The Pallets organization develops and supports MarkupSafe and other
 popular packages. In order to grow the community of contributors and
 users, and allow the maintainers to devote more time to the projects,
 [please donate today][].
 [please donate today]: https://palletsprojects.com/donate
--- a/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/RECORD
+++ b/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/RECORD
@ -0,0 +1,14 @@
 MarkupSafe-3.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
 MarkupSafe-3.0.2.dist-info/LICENSE.txt,sha256=SJqOEQhQntmKN7uYPhHg9-HTHwvY-Zp5yESOf_N9B-o,1475
 MarkupSafe-3.0.2.dist-info/METADATA,sha256=aAwbZhSmXdfFuMM-rEHpeiHRkBOGESyVLJIuwzHP-nw,3975
 MarkupSafe-3.0.2.dist-info/RECORD,,
 MarkupSafe-3.0.2.dist-info/WHEEL,sha256=_kVlewavvOSnwZE_whBk3jlE_Ob-nL5GvlVcLkpXSD8,151
 MarkupSafe-3.0.2.dist-info/top_level.txt,sha256=qy0Plje5IJuvsCBjejJyhDCjEAdcDLK_2agVcex8Z6U,11
 markupsafe/__init__.py,sha256=sr-U6_27DfaSrj5jnHYxWN-pvhM27sjlDplMDPZKm7k,13214
 markupsafe/__pycache__/__init__.cpython-310.pyc,,
 markupsafe/__pycache__/_native.cpython-310.pyc,,
 markupsafe/_native.py,sha256=hSLs8Jmz5aqayuengJJ3kdT5PwNpBWpKrmQSdipndC8,210
 markupsafe/_speedups.c,sha256=O7XulmTo-epI6n2FtMVOrJXl8EAaIwD2iNYmBI5SEoQ,4149
 markupsafe/_speedups.cpython-310-x86_64-linux-gnu.so,sha256=x4RoxWgyqAEokk-AZrWvrLDxLE-dm-zZSZYV_gOiLJA,34976
 markupsafe/_speedups.pyi,sha256=ENd1bYe7gbBUf2ywyYWOGUpnXOHNJ-cgTNqetlW8h5k,41
 markupsafe/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
--- a/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/WHEEL
+++ b/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/WHEEL
@ -0,0 +1,6 @@
 Wheel-Version: 1.0
 Generator: setuptools (75.2.0)
 Root-Is-Purelib: false
 Tag: cp310-cp310-manylinux_2_17_x86_64
 Tag: cp310-cp310-manylinux2014_x86_64
--- a/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/top_level.txt
+++ b/venv/lib/python3.10/site-packages/MarkupSafe-3.0.2.dist-info/top_level.txt
@ -0,0 +1 @@
 markupsafe
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/AUTHORS.md
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/AUTHORS.md
@ -0,0 +1,37 @@
 # Authors
 * Ivan Herman ([@iherman](http://github.com/iherman))
 * Sergio Fernández ([@wikier](http://github.com/wikier))
 * Carlos Tejo ([@dayures](http://github.com/dayures))
 * Alexey Zakhlestin ([@indeyets](http://github.com/indeyets))
 # Contributors
 See https://github.com/RDFLib/sparqlwrapper/graphs/contributors
 * [@eggplants]https://github.com/eggplants: most things to make 2.0.0 happen
 * Obey Arthur Liu ([@ArthurLiu](http://github.com/ArthurLiu)): different patches
 * Christopher Lenz ([@cmlenz](http://github.com/cmlenz)): feature to allow developers to choose the json module
 * Pēteris Caune ([@cuu508](http://github.com/cuu508)): great feedback and patches
 * Bogdan Benea ([bugdone@users.sourceforge.net](mailto:bugdone@users.sourceforge.net)), patch for the query regular expresion
 * William Waites ([@wwaites](http://github.com/wwaites)): patches for RDFLib3
 * Christoph Burgmer ([@cburgmer](http://github.com/cburgmer)): patches for RDFLib3
 * Thomas Kluyver ([@takluyver](http://github.com/takluyver)): patches for Python 3.x
 * Diego Berrueta ([@berrueta](http://github.com/berrueta)): new function for printing results as table
 * Olivier Berger ([@olberger](http://github.com/olberger)): patch regarding raw response for unknown formats
 * Benjamin Cogrel ([@bcogrel](http://github.com/bcogrel)): standard query types
 * Urs Holzer ([@uholzer](http://github.com/uholzer)): features, patches and testing
 * Alf Lervåg ([@alf](http://github.com/alf)): setup patch
 * Nolan Nichols ([@nicholsn](http://github.com/nicholsn)): http disgest auth support
 * Kevin Turner ([@keturn](https://github.com/keturn)): `SmartWrapper.Value.__repr__()` implementation
 * Marcelo Jorge Vieira ([@marcelometal](https://github.com/marcelometal)): typos
 * Trevor Andersen ([@trevorandersen](https://github.com/trevorandersen): patches for Python 3.x
 * Carlos Martinez-Ortiz ([@cmartinez](https://github.com/cmartinez): improves support for return format HTTP parameter
 * Christian Amsüss ([@chrysn](https://github.com/chrysn)): dependecy fixes
 * Chris Lamb ([@lamby](https://github.com/lamby)): typo
 * Hugo van Kemenade ([@hugovk](https://github.com/hugovk)): update classifiers (Python 3.6)
 * Edward Betts ([@EdwardBetts](https://github.com/EdwardBetts)): Correct spelling mistakes
 * Carlos Martínez ([@c-martinez](https://github.com/c-martinez)): Mainly support for CSV and TSV results in SPARQL SELECT queries
 * Dan Michael O. Heggø ([@danmichaelo](https://github.com/danmichaelo)): update README with SPARQLWrapper2 example
 * Sam Clements ([@borntyping](https://github.com/borntyping)): Provide hints about setting properly the timeout
 * Marc Feger ([@MaFeg100](https://github.com/MaFeg100)): Improve/tests for development
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/INSTALLER
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/INSTALLER
@ -0,0 +1 @@
 pip
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/LICENSE.txt
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/LICENSE.txt
@ -0,0 +1,18 @@
 SPARQL Python Wrapper is released under the W3C® SOFTWARE NOTICE AND LICENSE. 
 This work (and included software, documentation such as READMEs, or other related items) is being provided by the copyright holders under the following license. By obtaining, using and/or copying this work, you (the licensee) agree that you have read, understood, and will comply with the following terms and conditions.
 Permission to copy, modify, and distribute this software and its documentation, with or without modification, for any purpose and without fee or royalty is hereby granted, provided that you include the following on ALL copies of the software and documentation or portions thereof, including modifications:
   1. The full text of this NOTICE in a location viewable to users of the redistributed or derivative work.
   2. Any pre-existing intellectual property disclaimers, notices, or terms and conditions. If none exist, the W3C Software Short Notice should be included (hypertext is preferred, text is permitted) within the body of any redistributed or derivative code.
   3. Notice of any changes or modifications to the files, including the date changes were made. (We recommend you provide URIs to the location from which the code is derived.)
 THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
 COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR DOCUMENTATION.
 The name and trademarks of copyright holders may NOT be used in advertising or publicity pertaining to the software without specific, written prior permission. Title to copyright in this software and any associated documentation will at all times remain with copyright holders.
 See also http://www.w3.org/Consortium/Legal/copyright-software for further details
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/METADATA
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/METADATA
@ -0,0 +1,45 @@
 Metadata-Version: 2.1
 Name: SPARQLWrapper
 Version: 2.0.0
 Summary: SPARQL Endpoint interface to Python
 Home-page: http://rdflib.github.io/sparqlwrapper
 Download-URL: https://github.com/RDFLib/sparqlwrapper/releases
 Author: Ivan Herman, Sergio Fernández, Carlos Tejo Alonso, Alexey Zakhlestin
 Author-email: rdflib-dev@googlegroups.com
 License: W3C SOFTWARE NOTICE AND LICENSE
 Project-URL: Home, https://rdflib.github.io/sparqlwrapper
 Project-URL: Documentation, https://sparqlwrapper.readthedocs.io
 Project-URL: Source, https://github.com/RDFLib/sparqlwrapper
 Project-URL: Tracker, https://github.com/RDFLib/sparqlwrapper/issues
 Keywords: python,sparql,rdf,rdflib
 Platform: any
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: W3C License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.7
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.7
 License-File: LICENSE.txt
 License-File: AUTHORS.md
 Requires-Dist: rdflib (>=6.1.1)
 Provides-Extra: dev
 Requires-Dist: setuptools (>=3.7.1) ; extra == 'dev'
 Requires-Dist: mypy (>=0.931) ; extra == 'dev'
 Requires-Dist: pandas (>=1.3.5) ; extra == 'dev'
 Requires-Dist: pandas-stubs (>=1.2.0.48) ; extra == 'dev'
 Provides-Extra: docs
 Requires-Dist: sphinx (<5) ; extra == 'docs'
 Requires-Dist: sphinx-rtd-theme ; extra == 'docs'
 Provides-Extra: keepalive
 Requires-Dist: keepalive (>=0.5) ; extra == 'keepalive'
 Provides-Extra: pandas
 Requires-Dist: pandas (>=1.3.5) ; extra == 'pandas'
 This is a wrapper around a SPARQL service. It helps in creating the query URI and, possibly, convert the result into a more manageable format.
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/RECORD
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/RECORD
@ -0,0 +1,25 @@
 ../../../bin/rqw,sha256=qf6Nvwhjovp_uPIPeeMNocB3j7iZ_YnskuMQcUK6DYY,291
 SPARQLWrapper-2.0.0.dist-info/AUTHORS.md,sha256=7oV4hamlTbjfsaWy15f3BVH2h90Nf5mJ-rR0Z1azy9s,2725
 SPARQLWrapper-2.0.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
 SPARQLWrapper-2.0.0.dist-info/LICENSE.txt,sha256=Z1IX12CEodcefDAOAMJ7irELJAX-huUCOiuzio5G8Ik,2134
 SPARQLWrapper-2.0.0.dist-info/METADATA,sha256=kU92L4KNVjo9aP6-jm4FXVAUpNScd5mIWWbIGHu_D_I,2020
 SPARQLWrapper-2.0.0.dist-info/RECORD,,
 SPARQLWrapper-2.0.0.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 SPARQLWrapper-2.0.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
 SPARQLWrapper-2.0.0.dist-info/entry_points.txt,sha256=aIYAzonEA7winfiw8NydOLNu406HC6aRBlKLI2H5kEQ,48
 SPARQLWrapper-2.0.0.dist-info/top_level.txt,sha256=3KluNiTwOkX16hLJwC3UEYzKdEscknK--UV5q56mYWY,14
 SPARQLWrapper/KeyCaseInsensitiveDict.py,sha256=JF83-6EPbcm9F4gg0GQ11vTVuLzdJ7sDsubEP9j-3zw,1377
 SPARQLWrapper/SPARQLExceptions.py,sha256=qFlU175hp61gO6bvgQsCdSTEGOFnJwJNBQlIGS5W7-o,2595
 SPARQLWrapper/SmartWrapper.py,sha256=GxZiMGZpGppPZX54W-YdUtcdAAa83GJjPLdyfLWPK-4,15557
 SPARQLWrapper/Wrapper.py,sha256=M9lTPkpvRU2xAUbrHiKYK0mEV8pkycNS3lPoO__0gSE,58238
 SPARQLWrapper/__init__.py,sha256=6kU9hD9FnlFbk2c8uFkpGb1arB3268nN74RUh91e60s,1213
 SPARQLWrapper/__pycache__/KeyCaseInsensitiveDict.cpython-310.pyc,,
 SPARQLWrapper/__pycache__/SPARQLExceptions.cpython-310.pyc,,
 SPARQLWrapper/__pycache__/SmartWrapper.cpython-310.pyc,,
 SPARQLWrapper/__pycache__/Wrapper.cpython-310.pyc,,
 SPARQLWrapper/__pycache__/__init__.cpython-310.pyc,,
 SPARQLWrapper/__pycache__/main.cpython-310.pyc,,
 SPARQLWrapper/__pycache__/sparql_dataframe.cpython-310.pyc,,
 SPARQLWrapper/main.py,sha256=MKNPMrFxIGN_A7-UwyMS_AycjswscgKsP37h2K2df8k,4330
 SPARQLWrapper/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 SPARQLWrapper/sparql_dataframe.py,sha256=-oM7_eXbwGgeNkFv9mSxe3JWHM3xQQk90nNrbhthnrI,2429
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/REQUESTED
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/REQUESTED
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/WHEEL
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/WHEEL
@ -0,0 +1,5 @@
 Wheel-Version: 1.0
 Generator: bdist_wheel (0.37.1)
 Root-Is-Purelib: true
 Tag: py3-none-any
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/entry_points.txt
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/entry_points.txt
@ -0,0 +1,2 @@
 [console_scripts]
 rqw = SPARQLWrapper.main:main
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/top_level.txt
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper-2.0.0.dist-info/top_level.txt
@ -0,0 +1 @@
 SPARQLWrapper
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/KeyCaseInsensitiveDict.py
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/KeyCaseInsensitiveDict.py
@ -0,0 +1,46 @@
 # -*- coding: utf-8 -*-
 """
 A simple implementation of a key case-insensitive dictionary.
 ..
  Developers involved:
  * Ivan Herman <http://www.ivan-herman.net>
  * Sergio Fernández <http://www.wikier.org>
  * Carlos Tejo Alonso <http://www.dayures.net>
  * Alexey Zakhlestin <https://indeyets.ru/>
  Organizations involved:
  * `World Wide Web Consortium <http://www.w3.org>`_
  * `Foundation CTIC <http://www.fundacionctic.org/>`_
  :license: `W3C® Software notice and license <http://www.w3.org/Consortium/Legal/copyright-software>`_
 """
 from typing import Dict, Mapping, TypeVar
 _V = TypeVar("_V")
 class KeyCaseInsensitiveDict(Dict[str, _V]):
    """
    A simple implementation of a key case-insensitive dictionary
    """
    def __init__(self, d: Mapping[str, _V]={}) -> None:
        """
        :param dict d: The source dictionary.
        """
        for k, v in d.items():
            self[k] = v
    def __setitem__(self, key: str, value: _V) -> None:
        if hasattr(key, "lower"):
            key = key.lower()
        dict.__setitem__(self, key, value)
    def __getitem__(self, key: str) -> _V:
        if hasattr(key, "lower"):
            key = key.lower()
        return dict.__getitem__(self, key)
    def __delitem__(self, key: str) -> None:
        if hasattr(key, "lower"):
            key = key.lower()
        dict.__delitem__(self, key)
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/SPARQLExceptions.py
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/SPARQLExceptions.py
@ -0,0 +1,94 @@
 # -*- coding: utf-8 -*-
 """
 SPARQL Wrapper exceptions
 ..
  Developers involved:
  * Ivan Herman <http://www.ivan-herman.net>
  * Sergio Fernández <http://www.wikier.org>
  * Carlos Tejo Alonso <http://www.dayures.net>
  * Alexey Zakhlestin <https://indeyets.ru/>
  Organizations involved:
  * `World Wide Web Consortium <http://www.w3.org>`_
  * `Foundation CTIC <http://www.fundacionctic.org/>`_
  :license: `W3C® Software notice and license <http://www.w3.org/Consortium/Legal/copyright-software>`_
 """
 from typing import Optional
 class SPARQLWrapperException(Exception):
    """
    Base class for SPARQL Wrapper exceptions
    """
    msg = "an exception has occurred"
    def __init__(self, response: Optional[bytes] = None):
        """
        :param string response: The server response
        """
        if response:
            formatted_msg = "%s: %s. \n\nResponse:\n%r" % (
                self.__class__.__name__,
                self.msg,
                response,
            )
        else:
            formatted_msg = "%s: %s." % (self.__class__.__name__, self.msg)
        super(SPARQLWrapperException, self).__init__(formatted_msg)
 class EndPointInternalError(SPARQLWrapperException):
    """
    Exception type for Internal Server Error responses. Usually HTTP response status code ``500``.
    """
    msg = "The endpoint returned the HTTP status code 500"
 class QueryBadFormed(SPARQLWrapperException):
    """
    Query Bad Formed exception. Usually HTTP response status code ``400``.
    """
    msg = "A bad request has been sent to the endpoint: probably the SPARQL query is badly formed"
 class EndPointNotFound(SPARQLWrapperException):
    """
    End Point Not Found exception. Usually HTTP response status code ``404``.
    """
    msg = "It was not possible to connect to the given endpoint: check it is correct"
 class Unauthorized(SPARQLWrapperException):
    """
    Access is denied due to invalid credentials (unauthorized). Usually HTTP response status code ``401``.
    .. versionadded:: 1.8.2
    """
    msg = "Access to that endpoint is denied due to invalid credentials (unauthorized). Check the credentials"
 class URITooLong(SPARQLWrapperException):
    """
    The URI requested by the client is longer than the server is willing to interpret. Usually HTTP response
    status code ``414``.
    .. versionadded:: 1.8.3
    """
    msg = (
        "The URI requested by the client is longer than the server is willing to interpret. "
        "Check if the request was sent using GET method instead of POST method."
    )
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/SmartWrapper.py
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/SmartWrapper.py
@ -0,0 +1,366 @@
 # -*- coding: utf-8 -*-
 """
 ..
  Developers involved:
  * Ivan Herman <http://www.ivan-herman.net>
  * Sergio Fernández <http://www.wikier.org>
  * Carlos Tejo Alonso <http://www.dayures.net>
  * Alexey Zakhlestin <https://indeyets.ru/>
  Organizations involved:
  * `World Wide Web Consortium <http://www.w3.org>`_
  * `Foundation CTIC <http://www.fundacionctic.org/>`_
  :license: `W3C® Software notice and license <http://www.w3.org/Consortium/Legal/copyright-software>`_
  :requires: `RDFLib <https://rdflib.readthedocs.io>`_ package.
 """
 from typing import Any, Dict, List, Optional, Tuple, Union
 from SPARQLWrapper.Wrapper import JSON, SELECT, QueryResult
 from SPARQLWrapper.Wrapper import SPARQLWrapper as SW
 ######################################################################################
 class Value(object):
    """
    Class encapsulating a single binding for a variable.
    :ivar variable: The original variable, stored for an easier reference.
    :vartype variable: string
    :ivar value: Value of the binding.
    :vartype value: string
    :ivar type: Type of the binding. One of :attr:`Value.URI`, :attr:`Value.Literal`, :attr:`Value.TypedLiteral`, or
                :attr:`Value.BNODE`.
    :vartype type: string
    :ivar lang: Language tag of the binding, or ``None`` if not set.
    :vartype lang: string
    :ivar datatype: Datatype of the binding, or ``None`` if not set. It is an URI.
    :vartype datatype: string
    """
    URI = "uri"
    """the string denoting a URI variable."""
    Literal = "literal"
    """the string denoting a Literal variable."""
    TypedLiteral = "typed-literal"
    """the string denoting a typed literal variable."""
    BNODE = "bnode"
    """the string denoting a blank node variable."""
    def __init__(self, variable: str, binding: Dict[str, str]) -> None:
        """
        :param variable: the variable for that binding. Stored for an easier reference.
        :type variable: string
        :param binding: the binding dictionary part of the return result for a specific binding.
        :type binding: dict
        """
        self.variable = variable
        self.value = binding["value"]
        self.type = binding["type"]
        self.lang = None
        self.datatype = None
        try:
            self.lang = binding["xml:lang"]
        except:
            # no lang is set
            pass
        try:
            self.datatype = binding["datatype"]
        except:
            pass
    def __repr__(self) -> str:
        cls = self.__class__.__name__
        return "%s(%s:%r)" % (cls, self.type, self.value)
 ######################################################################################
 class Bindings(object):
    """
    Class encapsulating one query result, based on the JSON return format. It decodes the
    return values to make it a bit more usable for a standard usage. The class consumes the
    return value and instantiates a number of attributes that can be consulted directly. See
    the list of variables.
    The `Serializing SPARQL Query Results in JSON <http://www.w3.org/TR/rdf-sparql-json-res/>`_ explains the details of
    the JSON return structures. Very succinctly: the return data has "bindings", which means a list of dictionaries.
    Each dictionary is a possible binding of the SELECT variables to :class:`Value` instances. This structure is made a
    bit more usable by this class.
    :ivar fullResult: The original dictionary of the results, stored for an easier reference.
    :vartype fullResult: dict
    :ivar head: Header part of the return, see the JSON return format document for details.
    :vartype head: dict
    :ivar variables: List of unbounds (variables) of the original query. It is a list of strings. ``None`` in the case
                        of an ASK query.
    :vartype variables: list
    :ivar bindings: The final bindings: list of dictionaries, mapping variables to :class:`Value` instances. \
    If unbound, then no value is set in the dictionary; that can be easily checked with \
    ``var in res.bindings[..]``, for example.
    :vartype bindings: list
    :ivar askResult: by default, set to **False**; in case of an ASK query, the result of the query.
    :vartype askResult: bool
    """
    def __init__(self, retval: QueryResult):
        """
        :param retval: the query result.
        :type retval: :class:`QueryResult<SPARQLWrapper.Wrapper.QueryResult>`
        """
        self.fullResult = retval._convertJSON()
        self.head = self.fullResult["head"]
        self.variables: Optional[List[str]] = None
        try:
            self.variables = self.fullResult["head"]["vars"]
        except:
            pass
        self.bindings: List[Dict[str, Value]] = []
        try:
            for b in self.fullResult["results"]["bindings"]:
                # This is a single binding. It is a dictionary per variable; each value is a dictionary again
                # that has to be converted into a Value instance
                newBind = {}
                # type error: Item "None" of "Union[List[str], Any, None]" has no attribute "__iter__" (not iterable)
                for key in self.variables:  # type: ignore [union-attr]
                    if key in b:
                        # there is a real binding for this key
                        newBind[key] = Value(key, b[key])
                self.bindings.append(newBind)
        except:
            pass
        self.askResult = False
        try:
            self.askResult = self.fullResult["boolean"]
        except:
            pass
    def getValues(self, key: str) -> Optional[List[Value]]:
        """A shorthand for the retrieval of all bindings for a single key. It is
        equivalent to ``[b[key] for b in self[key]]``
        :param key: possible variable name.
        :type key: string
        :return: list of :class:`Value` instances.
        :rtype: list
        """
        try:
            return [b[key] for b in self[key]]
        except:
            return []
    def __contains__(self, key: Union[str, List[str], Tuple[str]]) -> bool:
        """Emulation of the "``key in obj``" operator. Key can be a string for a variable or an array/tuple
        of strings.
        If ``key`` is a variable, the return value is ``True`` if there is at least one binding where ``key`` is
        bound. If ``key`` is an array or tuple, the return value is ``True`` if there is at least one binding
        where *all* variables in ``key`` are bound.
        :param key: possible variable, or array/tuple of variables
        :return: whether there is a binding of the variable in the return
        :rtype: Boolean
        """
        if len(self.bindings) == 0:
            return False
        if type(key) is list or type(key) is tuple:
            # check first whether they are all really variables
            # type error: Unsupported right operand type for in ("Optional[List[str]]")
            if False in [k in self.variables for k in key]:  # type: ignore [operator]
                return False
            for b in self.bindings:
                # try to find a binding where all key elements are present
                if False in [k in b for k in key]:
                    # this is not a binding for the key combination, move on...
                    continue
                else:
                    # yep, this one is good!
                    return True
            return False
        else:
            # type error: Unsupported right operand type for in ("Optional[List[str]]")
            if key not in self.variables:  # type: ignore [operator]
                return False
            for b in self.bindings:
                if key in b:
                    return True
            return False
    def __getitem__(self, key: Union[slice, str, List[str]]) -> List[Dict[str, Value]]:
        """Emulation of the ``obj[key]`` operator.  Slice notation is also available.
        The goal is to choose the right bindings among the available ones. The return values are always
        arrays  of bindings, ie, arrays of dictionaries mapping variable keys to :class:`Value` instances.
        The different value settings mean the followings:
         - ``obj[key]`` returns the bindings where ``key`` has a valid value
         - ``obj[key1,key2,...]`` returns the bindings where *all* ``key1,key2,...`` have valid values
         - ``obj[(key1,key2,...):(nkey1,nkey2,...)]`` returns the bindings where all ``key1,key2,...`` have
         valid values and *none* of the ``nkey1,nkey2,...`` have valid values
         - ``obj[:(nkey1,nkey2,...)]`` returns the bindings where *none* of the ``nkey1,nkey2,...`` have valid values
        In all cases complete bindings are returned, ie, the values for other variables, not present among
        the keys in the call, may or may not be present depending on the query results.
        :param key: possible variable or array/tuple of keys with possible slice notation
        :return: list of bindings
        :rtype: array of variable -> :class:`Value`  dictionaries
        """
        def _checkKeys(keys: Union[List[Any], Tuple[Any, ...]]) -> bool:
            if len(keys) == 0:
                return False
            for k in keys:
                # type error: Unsupported right operand type for in ("Optional[List[str]]")
                if (
                    not isinstance(k, str)
                    or k not in self.variables  # type: ignore [operator]
                ):
                    return False
            return True
        def _nonSliceCase(
            key: Union[
                str,
                List[Any],
                Tuple[Any],
            ]
        ) -> Union[List[Any], bool, Tuple[Any]]:
            # type error: Unsupported right operand type for in ("Optional[List[str]]")
            if isinstance(key, str) and key != "" and key in self.variables:  # type: ignore[operator]
                # unicode or string:
                return [key]
            elif type(key) is list or type(key) is tuple:
                if _checkKeys(key):
                    return key
            return False
        # The arguments should be reduced to arrays of variables, ie, unicode strings
        yes_keys: Union[List[Any], bool, Tuple[Any]] = []
        no_keys: Union[List[Any], bool, Tuple[Any]] = []
        if type(key) is slice:
            # Note: None for start or stop is all right
            if key.start:
                yes_keys = _nonSliceCase(key.start)
                if not yes_keys:
                    raise TypeError
            if key.stop:
                no_keys = _nonSliceCase(key.stop)
                if not no_keys:
                    raise TypeError
        else:
            yes_keys = _nonSliceCase(key)
        # got it right, now get the right binding line with the constraints
        retval: List[Dict[str, Value]] = []
        for b in self.bindings:
            # first check whether the 'yes' part is all there:
            # type error: Item "bool" of "Union[List[Any], bool, Tuple[Any]]" has no attribute "__iter__" (not iterable)
            if False in [k in b for k in yes_keys]:  # type: ignore[union-attr]
                continue
            # type error: Item "bool" of "Union[List[Any], bool, Tuple[Any]]" has no attribute "__iter__" (not iterable)
            if True in [k in b for k in no_keys]:  # type: ignore[union-attr]
                continue
            # if we got that far, we should be all right!
            retval.append(b)
        # if retval is of zero length, no hit; an exception should be raised to stay within the python style
        if len(retval) == 0:
            raise IndexError
        return retval
    def convert(self) -> "Bindings":
        """This is just a convenience method, returns ``self``.
        Although :class:`SPARQLWrapper2.Bindings` is not a subclass of
        :class:`SPARQLWrapper.QueryResult<SPARQLWrapper.Wrapper.QueryResult>`, it is returned as a result by
        :func:`SPARQLWrapper2.query`, just like :class:`QueryResult<SPARQLWrapper.Wrapper.QueryResult>` is returned by
        :func:`SPARQLWrapper.query()<SPARQLWrapper.Wrapper.SPARQLWrapper.query>`. Consequently,
        having an empty :func:`convert` method to imitate
        :class:`QueryResult's convert() method<SPARQLWrapper.Wrapper.QueryResult.convert>`
        may avoid unnecessary problems.
        """
        return self
 ##############################################################################################################
 class SPARQLWrapper2(SW):
    """Subclass of :class:`~SPARQLWrapper.Wrapper.SPARQLWrapper` that works with a JSON SELECT return result only. The
    query result is automatically set to a :class:`Bindings` instance. Makes the average query processing a bit
    simpler..."""
    def __init__(self, baseURI: str, defaultGraph: Optional[str] = None):
        """
        Class encapsulating a full SPARQL call. In contrast to the :class:`~SPARQLWrapper.Wrapper.SPARQLWrapper`
        superclass, the return format cannot be set (it is defaulted to
        :attr:`~SPARQLWrapper.Wrapper.SPARQLWrapper.JSON`).
        :param baseURI: string of the SPARQL endpoint's URI.
        :type baseURI: string
        :param defaultGraph: URI for the default graph. Default is ``None``, can be set via an explicit call, too.
        :type defaultGraph: string
        """
        super(SPARQLWrapper2, self).__init__(
            baseURI, returnFormat=JSON, defaultGraph=defaultGraph
        )
    def setReturnFormat(self, format: Optional[str]) -> None:
        """
        Set the return format (:meth:`overriding the inherited method
        <SPARQLWrapper.Wrapper.SPARQLWrapper.setReturnFormat>`).
        .. warning::
           This method does nothing; this class instance should work with JSON only. The method is defined \
           just to avoid possible errors by erroneously setting the return format. \
           When using this class, the user can safely ignore this call.
        :param format: return format
        :type format: string
        """
        pass
    def query(self) -> Union[Bindings, QueryResult]:  # type: ignore[override]
        """
        Execute the query and do an automatic conversion.
        Exceptions can be raised if either the URI is wrong or the HTTP sends back an error.
        The usual urllib2 exceptions are raised, which cover possible SPARQL errors, too.
        If the query type is *not* SELECT, the method falls back to the
        :meth:`corresponding method in the superclass<SPARQLWrapper.Wrapper.SPARQLWrapper.query>`.
        :return: query result
        :rtype: :class:`Bindings` instance
        """
        res = super(SPARQLWrapper2, self).query()
        if self.queryType == SELECT:
            return Bindings(res)
        else:
            return res
    def queryAndConvert(  # type: ignore[override]
        self,
    ) -> Union[Union[Bindings, QueryResult], QueryResult.ConvertResult]:
        """This is here to override the inherited method; it is equivalent to :class:`query`.
        If the query type is *not* SELECT, the method falls back to the
        :meth:`corresponding method in the superclass<SPARQLWrapper.Wrapper.SPARQLWrapper.queryAndConvert>`.
        :return: the converted query result.
        """
        if self.queryType == SELECT:
            return self.query()
        else:
            return super(SPARQLWrapper2, self).queryAndConvert()
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/Wrapper.py
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/Wrapper.py
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/init.py
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/init.py
@ -0,0 +1,72 @@
 # -*- coding: utf8 -*-
 """
 **SPARQLWrapper** is a simple Python wrapper around a `SPARQL <https://www.w3.org/TR/sparql11-overview/>`_ service to
 remotelly execute your queries. It helps in creating the query
 invokation and, possibly, convert the result into a more manageable
 format.
 """
 __version__ = "2.0.0"
 """The version of SPARQLWrapper"""
 __agent__: str = f"sparqlwrapper {__version__} (rdflib.github.io/sparqlwrapper)"
 from .SmartWrapper import SPARQLWrapper2
 from .sparql_dataframe import get_sparql_dataframe
 from .Wrapper import (
    ASK,
    BASIC,
    CONSTRUCT,
    CSV,
    DELETE,
    DESCRIBE,
    DIGEST,
    GET,
    INSERT,
    JSON,
    JSONLD,
    N3,
    POST,
    POSTDIRECTLY,
    RDF,
    RDFXML,
    SELECT,
    TSV,
    TURTLE,
    URLENCODED,
    XML,
    QueryResult,
    SPARQLWrapper,
 )
 __all__ = [
    "SPARQLWrapper2",
    "get_sparql_dataframe",
    "ASK",
    "BASIC",
    "CONSTRUCT",
    "CSV",
    "DELETE",
    "DESCRIBE",
    "DIGEST",
    "GET",
    "INSERT",
    "JSON",
    "JSONLD",
    "N3",
    "POST",
    "POSTDIRECTLY",
    "RDF",
    "RDFXML",
    "SELECT",
    "TSV",
    "TURTLE",
    "URLENCODED",
    "XML",
    "QueryResult",
    "SPARQLWrapper",
 ]
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/KeyCaseInsensitiveDict.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/KeyCaseInsensitiveDict.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/SPARQLExceptions.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/SPARQLExceptions.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/SmartWrapper.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/SmartWrapper.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/Wrapper.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/Wrapper.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/init.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/init.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/main.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/main.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/sparql_dataframe.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/pycache/sparql_dataframe.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/main.py
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/main.py
@ -0,0 +1,157 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import argparse
 import json
 import os
 import shutil
 import sys
 import xml
 from typing import List, Optional
 import rdflib
 from . import __version__
 from .Wrapper import SPARQLWrapper, _allowedAuth, _allowedFormats, _allowedRequests
 class SPARQLWrapperFormatter(
    argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter
 ):
    pass
 def check_file(v: str) -> str:
    if os.path.isfile(v):
        return v
    elif v == "-":
        return "-"  # stdin
    else:
        raise argparse.ArgumentTypeError("file '%s' is not found" % v)
 def choicesDescriptions() -> str:
    d = "\n  - ".join(["allowed FORMAT:"] + _allowedFormats)
    d += "\n  - ".join(["\n\nallowed METHOD:"] + _allowedRequests)
    d += "\n  - ".join(["\n\nallowed AUTH:"] + _allowedAuth)
    return d
 def parse_args(test: Optional[List[str]] = None) -> argparse.Namespace:
    """Parse arguments."""
    parser = argparse.ArgumentParser(
        prog="rqw",
        formatter_class=(
            lambda prog: SPARQLWrapperFormatter(
                prog,
                **{
                    "width": shutil.get_terminal_size(fallback=(120, 50)).columns,
                    "max_help_position": 30,
                },
            )
        ),
        description="sparqlwrapper CLI",
        epilog=choicesDescriptions(),
    )
    input_group = parser.add_mutually_exclusive_group(required=True)
    input_group.add_argument(
        "-f",
        "--file",
        metavar="FILE",
        type=check_file,
        help="query with sparql file (stdin: -)",
    )
    input_group.add_argument("-Q", "--query", metavar="QUERY", help="query with string")
    parser.add_argument(
        "-F",
        "--format",
        default="json",
        metavar="FORMAT",
        choices=_allowedFormats,
        help="response format",
    )
    parser.add_argument(
        "-e",
        "--endpoint",
        metavar="URI",
        help="sparql endpoint",
        default="http://dbpedia.org/sparql",
    )
    parser.add_argument(
        "-m",
        "--method",
        metavar="METHOD",
        choices=_allowedRequests,
        help="request method",
    )
    parser.add_argument(
        "-a", "--auth", metavar="AUTH", choices=_allowedAuth, help="HTTP auth"
    )
    parser.add_argument(
        "-u", "--username", metavar="ID", default="guest", help="username for auth"
    )
    parser.add_argument(
        "-p", "--password", metavar="PW", default="", help="password for auth"
    )
    parser.add_argument("-q", "--quiet", action="store_true", help="supress warnings")
    parser.add_argument(
        "-V", "--version", action="version", version="%(prog)s {}".format(__version__)
    )
    if test is None:
        return parser.parse_args()
    else:
        return parser.parse_args(test)
 def main(test: Optional[List[str]] = None) -> None:
    args = parse_args(test)
    if args.quiet:
        import warnings
        warnings.filterwarnings("ignore")
    q = ""
    if args.query is not None:
        q = args.query
    elif args.file == "-":
        q = sys.stdin.read()
    else:
        q = open(args.file, "r").read()
    sparql = SPARQLWrapper(
        args.endpoint,
        agent=(
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/96.0.4664.110 Safari/537.36"
        ),
    )
    if args.auth is not None:
        sparql.setHTTPAuth(args.auth)
        sparql.setCredentials(args.username, args.password)
    if args.method is not None:
        sparql.setMethod(args.method)
    sparql.setQuery(q)
    sparql.setReturnFormat(args.format)
    results = sparql.query().convert()
    if isinstance(results, dict):
        # "json"
        print(json.dumps(results, indent=4))
    elif isinstance(results, xml.dom.minidom.Document):
        # "xml"
        print(results.toxml())
    elif isinstance(results, bytes):
        # "csv", "tsv", "turtle", "n3"
        print(results.decode("utf-8"))
    elif isinstance(results, rdflib.graph.ConjunctiveGraph):
        # "rdf"
        print(results.serialize())
    else:
        # unknown type
        raise TypeError(f"Unsupported result of type {type(results)}: {results!r}")
 if __name__ == "__main__":
    main()
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/py.typed
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/py.typed
--- a/venv/lib/python3.10/site-packages/SPARQLWrapper/sparql_dataframe.py
+++ b/venv/lib/python3.10/site-packages/SPARQLWrapper/sparql_dataframe.py
@ -0,0 +1,74 @@
 """
 Query a SPARQL endpoint and return results as a Pandas dataframe.
 """
 import io
 from typing import TYPE_CHECKING, Any, Dict, List, Union
 from SPARQLWrapper.SmartWrapper import Bindings, SPARQLWrapper2, Value
 from SPARQLWrapper.Wrapper import CSV, SELECT, SPARQLWrapper
 if TYPE_CHECKING:
    import pandas as pd
 class QueryException(Exception):
    pass
 def get_sparql_dataframe_orig(
    endpoint: str, query: Union[str, bytes]
 ) -> "pd.DataFrame":
    """copy paste from: https://github.com/lawlesst/sparql-dataframe"""
    # pandas inside to avoid requiring it
    import pandas as pd
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    if sparql.queryType != SELECT:
        raise QueryException("Only SPARQL SELECT queries are supported.")
    sparql.setReturnFormat(CSV)
    results = sparql.query().convert()
    if isinstance(results, bytes):
        _csv = io.StringIO(results.decode("utf-8"))
        return pd.read_csv(_csv, sep=",")
    else:
        raise TypeError(type(results))
 def get_sparql_typed_dict(
    endpoint: str, query: Union[str, bytes]
 ) -> List[Dict[str, Value]]:
    """modified from: https://github.com/lawlesst/sparql-dataframe"""
    # pandas inside to avoid requiring it
    import pandas as pd
    # rdflib in here because there is some meta stuff in the setup.py and Travis fails because rdflib is installed later
    import rdflib.term
    sparql = SPARQLWrapper2(endpoint)
    sparql.setQuery(query)
    if sparql.queryType != SELECT:
        raise QueryException("Only SPARQL SELECT queries are supported.")
    # sparql.setReturnFormat(JSON)
    results = sparql.query()
    if not isinstance(results, Bindings):
        raise TypeError(type(results))
    # consider perf hacking later, probably slow
    # convert list of dicts to python types
    d = []
    for x in results.bindings:
        row = {}
        for k in x:
            v = x[k]
            vv = rdflib.term.Literal(v.value, datatype=v.datatype).toPython()  # type: ignore[no-untyped-call]
            row[k] = vv
        d.append(row)
    return d
 def get_sparql_dataframe(endpoint: str, query: Union[str, bytes]) -> "pd.DataFrame":
    # pandas inside to avoid requiring it
    import pandas as pd
    d = get_sparql_typed_dict(endpoint, query)
    # TODO: will nan fill somehow, make more strict if there is way of getting the nan types from rdflib
    df = pd.DataFrame(d)
    return df
--- a/venv/lib/python3.10/site-packages/pycache/six.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/pycache/six.cpython-310.pyc
--- a/Show More
+++ b/Show More
		`@ -0,0 +1 @@`
							`,jarnold,workPC,10.10.2025 09:26,file:///home/jarnold/.config/libreoffice/4;`
		`@ -0,0 +1,2 @@`
							`[console_scripts]`
							`rqw = SPARQLWrapper.main:main`