356 lines
12 KiB
Python
356 lines
12 KiB
Python
"""
|
||
===============================================================================
|
||
Skriptname: NV_SPOT_Export.py
|
||
Beschreibung:
|
||
Dieses Skript soll hierarchische Normvokabular-Tabellen
|
||
(ODS/XLSX-Format) in eine JSON-basierte SPOT-Struktur (Strukturierter
|
||
Positionsbaum) konvertieren. Es ermöglicht das Exportieren in Excel und ODS, sowie
|
||
das nachträgliche Ergänzen von Kategorien, Unterkategorien und Wörtern.
|
||
|
||
//NOCH NICHT GETESTET//
|
||
|
||
Hauptfunktionen:
|
||
- Node: Klasse zur Repräsentation von Baumknoten.
|
||
- load_excel_or_ods: Lädt Tabellen aus ODS/XLSX-Dateien.
|
||
- process_sheet_to_tree: Erzeugt eine Baumstruktur aus einem Sheet.
|
||
- save_spot_json: Speichert den SPOT-Baum als JSON.
|
||
- load_spot_json: Lädt SPOT-Daten aus JSON-Dateien.
|
||
- export_spot_to_excel: Exportiert den SPOT-Baum nach Excel.
|
||
- export_spot_to_ods: Exportiert den SPOT-Baum nach ODS.
|
||
- add_category/subcategory/word: Fügt Elemente im Baum hinzu.
|
||
- main: Steuert den Workflow.
|
||
|
||
Abhängigkeiten:
|
||
Python 3.x, pandas, openpyxl, ezodf, json, logging, datetime
|
||
|
||
Stand: 2025-10-01
|
||
===============================================================================
|
||
"""
|
||
|
||
import os
|
||
import json
|
||
import datetime
|
||
import pandas as pd
|
||
import ezodf
|
||
from openpyxl import Workbook
|
||
from openpyxl.utils import get_column_letter
|
||
from openpyxl.styles import Alignment
|
||
import logging
|
||
|
||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||
|
||
# ---------------- SPOT-Baumstruktur ----------------
|
||
class Node:
|
||
"""
|
||
Repräsentiert einen Knoten in der SPOT-Baumstruktur.
|
||
|
||
Attribute:
|
||
name (str): Anzeigename des Knotens.
|
||
id (str): Optionale ID (nur für Kategorien).
|
||
type (str): Knotentyp ("category", "subcategory", "word").
|
||
children (list[Node]): Unterknoten.
|
||
|
||
Methoden:
|
||
add_child(child): Fügt einen Unterknoten hinzu.
|
||
to_dict(): Serialisiert den Knoten in ein Dictionary/JSON-kompatibles Format.
|
||
from_dict(d): Rekonstruiert den Baum aus einem Dictionary.
|
||
"""
|
||
def __init__(self, name, node_type="category", id=None):
|
||
self.name = name
|
||
self.id = id
|
||
self.type = node_type # "category", "subcategory", "word"
|
||
self.children = []
|
||
|
||
def add_child(self, child):
|
||
"""Fügt dem aktuellen Knoten einen Unterknoten hinzu."""
|
||
self.children.append(child)
|
||
|
||
def to_dict(self):
|
||
"""Wandelt den Knoten (rekursiv) in ein Dictionary um."""
|
||
if self.type == "word":
|
||
return self.name
|
||
return {
|
||
"id": self.id,
|
||
"name": self.name,
|
||
"type": self.type,
|
||
"children": [c.to_dict() for c in self.children]
|
||
}
|
||
|
||
@staticmethod
|
||
def from_dict(d):
|
||
"""Erzeugt aus einem Dictionary ein Node-Objekt (rekursiv)."""
|
||
if isinstance(d, str):
|
||
return Node(d, "word")
|
||
node = Node(d["name"], d.get("type", "category"), d.get("id"))
|
||
node.children = [Node.from_dict(c) for c in d.get("children", [])]
|
||
return node
|
||
|
||
|
||
# ---------------- Funktionen zum Laden ----------------
|
||
def load_excel_or_ods(input_file, master_sheet="Masterstruktur"):
|
||
"""
|
||
Lädt ODS oder Excel-Datei und gibt Master- sowie Kategorien-DataFrames zurück.
|
||
|
||
Parameter:
|
||
input_file (str): Pfad zur Quelldatei.
|
||
master_sheet (str): Name des Masterblattes.
|
||
|
||
Rückgabe:
|
||
(master_df, dfs): Master-DataFrame und Dictionary mit anderen Sheets.
|
||
"""
|
||
ext = os.path.splitext(input_file)[1].lower()
|
||
engine = "openpyxl" if ext in [".xlsx", ".xls"] else "odf"
|
||
xls = pd.ExcelFile(input_file, engine=engine)
|
||
sheet_names = [s for s in xls.sheet_names if s != master_sheet]
|
||
dfs = {s: pd.read_excel(xls, sheet_name=s, engine=engine) for s in sheet_names}
|
||
master_df = pd.read_excel(xls, sheet_name=master_sheet, engine=engine)
|
||
return master_df, dfs
|
||
|
||
|
||
# ---------------- Baum aus Sheet erstellen ----------------
|
||
def process_sheet_to_tree(df):
|
||
"""
|
||
Wandelt ein Kategoriensheet in eine hierarchische Baumstruktur (Liste von Nodes) um.
|
||
|
||
Struktur:
|
||
Kategorie → Unterkategorie → Wort
|
||
|
||
Parameter:
|
||
df (pd.DataFrame): Eingabedaten mit Spalten ["ID", "Unterkategorie",
|
||
"Unterunterkategorie", "Wort/Vokabel"].
|
||
|
||
Rückgabe:
|
||
list[Node]: Liste von Baumknoten der obersten Ebene.
|
||
"""
|
||
df = df.fillna("").astype(str)
|
||
tree_nodes = []
|
||
current_cat = None
|
||
current_sub = None
|
||
|
||
for idx, row in df.iterrows():
|
||
id_val = row.get("ID", "").strip()
|
||
uk_val = row.get("Unterkategorie", "").strip()
|
||
uuk_val = row.get("Unterunterkategorie", "").strip()
|
||
word_val = row.get("Wort/Vokabel", "").strip()
|
||
|
||
# Neue Kategorieebene
|
||
if id_val:
|
||
current_cat = Node(uk_val or word_val, "category", id=id_val)
|
||
tree_nodes.append(current_cat)
|
||
current_sub = None
|
||
# Unterkategorie
|
||
elif uuk_val:
|
||
current_sub = Node(uuk_val, "subcategory")
|
||
if current_cat:
|
||
current_cat.add_child(current_sub)
|
||
# Wortebene
|
||
elif word_val:
|
||
word_node = Node(word_val, "word")
|
||
if current_sub:
|
||
current_sub.add_child(word_node)
|
||
elif current_cat:
|
||
current_cat.add_child(word_node)
|
||
return tree_nodes
|
||
|
||
|
||
# ---------------- SPOT laden/speichern ----------------
|
||
def save_spot_json(tree_nodes, file_path):
|
||
"""
|
||
Speichert den SPOT-Baum als JSON-Datei.
|
||
|
||
Parameter:
|
||
tree_nodes (list[Node]): Wurzelknoten der Baumstruktur.
|
||
file_path (str): Zielpfad.
|
||
"""
|
||
with open(file_path, "w", encoding="utf-8") as f:
|
||
json.dump([n.to_dict() for n in tree_nodes], f, indent=2, ensure_ascii=False)
|
||
logging.info(f"SPOT gespeichert: {file_path}")
|
||
|
||
|
||
def load_spot_json(file_path):
|
||
"""
|
||
Lädt SPOT-JSON-Datei und rekonstruiert den Baum.
|
||
|
||
Parameter:
|
||
file_path (str): Pfad zur JSON-Datei.
|
||
|
||
Rückgabe:
|
||
list[Node]: Liste oberster Knoten.
|
||
"""
|
||
with open(file_path, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
return [Node.from_dict(n) for n in data]
|
||
|
||
|
||
# ---------------- Export in Excel ----------------
|
||
def export_spot_to_excel(tree_nodes, output_file):
|
||
"""
|
||
Exportiert den SPOT-Baum in eine Excel-Datei.
|
||
|
||
Struktur:
|
||
Spalten A–D: ID, Kategorie, Unterkategorie, Wort.
|
||
|
||
Parameter:
|
||
tree_nodes (list[Node]): Baumstruktur.
|
||
output_file (str): Zielpfad der Excel-Datei.
|
||
"""
|
||
wb = Workbook()
|
||
wb.remove(wb.active)
|
||
|
||
for node in tree_nodes:
|
||
ws = wb.create_sheet(title=node.name[:31])
|
||
row_idx = 1
|
||
|
||
# Kategoriezeile
|
||
ws.cell(row=row_idx, column=1, value=node.id)
|
||
ws.cell(row=row_idx, column=2, value=node.name)
|
||
row_idx += 1
|
||
|
||
for sub in node.children:
|
||
if sub.type == "subcategory":
|
||
ws.cell(row=row_idx, column=3, value=sub.name)
|
||
row_idx += 1
|
||
for word in sub.children:
|
||
ws.cell(row=row_idx, column=4, value=word.name)
|
||
row_idx += 1
|
||
elif sub.type == "word":
|
||
ws.cell(row=row_idx, column=4, value=sub.name)
|
||
row_idx += 1
|
||
|
||
# Spaltenbreiten und Ausrichtung
|
||
for col_idx, col_letter in enumerate(["A", "B", "C", "D"], 1):
|
||
ws.column_dimensions[col_letter].width = 20
|
||
for r in range(1, row_idx):
|
||
ws.cell(r, col_idx).alignment = Alignment(horizontal='left')
|
||
|
||
wb.save(output_file)
|
||
logging.info(f"Excel exportiert: {output_file}")
|
||
|
||
|
||
# ---------------- Export in ODS ----------------
|
||
def export_spot_to_ods(tree_nodes, output_file):
|
||
"""
|
||
Exportiert den SPOT-Baum in eine ODS-Datei.
|
||
|
||
Struktur analog zum Excel-Export.
|
||
|
||
Parameter:
|
||
tree_nodes (list[Node]): Baumstruktur.
|
||
output_file (str): Zielpfad der ODS-Datei.
|
||
"""
|
||
doc = ezodf.newdoc(doctype="ods", filename=output_file)
|
||
|
||
for node in tree_nodes:
|
||
sheet = ezodf.Sheet(node.name[:31], size=(len(node.children) + 10, 4))
|
||
doc.sheets += sheet
|
||
|
||
sheet[0, 0].set_value("ID")
|
||
sheet[0, 1].set_value("Unterkategorie")
|
||
sheet[0, 2].set_value("Unterunterkategorie")
|
||
sheet[0, 3].set_value("Wort/Vokabel")
|
||
|
||
row_idx = 1
|
||
sheet[row_idx, 0].set_value(node.id)
|
||
sheet[row_idx, 1].set_value(node.name)
|
||
row_idx += 1
|
||
|
||
for sub in node.children:
|
||
if sub.type == "subcategory":
|
||
sheet[row_idx, 2].set_value(sub.name)
|
||
row_idx += 1
|
||
for word in sub.children:
|
||
sheet[row_idx, 3].set_value(word.name)
|
||
row_idx += 1
|
||
elif sub.type == "word":
|
||
sheet[row_idx, 3].set_value(sub.name)
|
||
row_idx += 1
|
||
|
||
doc.save()
|
||
logging.info(f"ODS exportiert: {output_file}")
|
||
|
||
|
||
# ---------------- CLI-Funktionen zum Editieren ----------------
|
||
def add_category(tree_nodes, cat_id, cat_name):
|
||
"""
|
||
Fügt eine neue Kategorie zum SPOT-Baum hinzu.
|
||
|
||
Parameter:
|
||
tree_nodes (list[Node]): Liste der obersten Knoten.
|
||
cat_id (str): ID der Kategorie.
|
||
cat_name (str): Name der Kategorie.
|
||
"""
|
||
tree_nodes.append(Node(cat_name, "category", id=cat_id))
|
||
logging.info(f"Kategorie hinzugefügt: {cat_id} {cat_name}")
|
||
|
||
|
||
def add_subcategory(tree_nodes, cat_id, sub_name):
|
||
"""
|
||
Fügt einer vorhandenen Kategorie eine Unterkategorie hinzu.
|
||
|
||
Parameter:
|
||
tree_nodes (list[Node]): Wurzelknoten.
|
||
cat_id (str): Zielkategorie-ID.
|
||
sub_name (str): Name der Unterkategorie.
|
||
"""
|
||
for cat in tree_nodes:
|
||
if cat.id == cat_id:
|
||
cat.add_child(Node(sub_name, "subcategory"))
|
||
logging.info(f"Unterkategorie hinzugefügt: {sub_name} in {cat_id}")
|
||
return
|
||
|
||
|
||
def add_word(tree_nodes, cat_id, sub_name, word_name):
|
||
"""
|
||
Fügt einem Unterknoten ein Wort hinzu.
|
||
|
||
Parameter:
|
||
tree_nodes (list[Node]): Wurzelknoten.
|
||
cat_id (str): ID der Kategorie.
|
||
sub_name (str): Name der Unterkategorie.
|
||
word_name (str): Neues Wort.
|
||
"""
|
||
for cat in tree_nodes:
|
||
if cat.id == cat_id:
|
||
for sub in cat.children:
|
||
if sub.name == sub_name:
|
||
sub.add_child(Node(word_name, "word"))
|
||
logging.info(f"Wort hinzugefügt: {word_name} unter {sub_name}")
|
||
return
|
||
|
||
|
||
# ---------------- HAUPTPROGRAMM ----------------
|
||
def main():
|
||
"""
|
||
Ablauf:
|
||
1. Liest Masterdatei (ODS oder XLSX).
|
||
2. Wandelt Kategorienblätter in SPOT-Struktur um.
|
||
3. Speichert SPOT als JSON.
|
||
4. Exportiert SPOT nach Excel und ODS.
|
||
5. Optional: Bearbeiten des Baums über CLI-Funktionen.
|
||
"""
|
||
INPUT_FILE = "NV_MASTER.ods"
|
||
OUTPUT_SPOT = "nv_spot.json"
|
||
today = datetime.datetime.today().strftime("%y.%m.%d")
|
||
OUTPUT_EXCEL = f"NV_MASTER_SPOT_{today}.xlsx"
|
||
OUTPUT_ODS = f"NV_MASTER_SPOT_{today}.ods"
|
||
|
||
master_df, dfs = load_excel_or_ods(INPUT_FILE)
|
||
spot_tree = []
|
||
for sheet, df in dfs.items():
|
||
spot_tree.extend(process_sheet_to_tree(df))
|
||
|
||
save_spot_json(spot_tree, OUTPUT_SPOT)
|
||
|
||
# Beispielhafte Nutzung der Editierfunktionen:
|
||
# add_category(spot_tree, "10.1", "Neue Kategorie")
|
||
# add_subcategory(spot_tree, "10.1", "Neue Unterunterkategorie")
|
||
# add_word(spot_tree, "10.1", "Neue Unterunterkategorie", "Neues Wort")
|
||
|
||
export_spot_to_excel(spot_tree, OUTPUT_EXCEL)
|
||
export_spot_to_ods(spot_tree, OUTPUT_ODS)
|
||
logging.info("SPOT-Workflow abgeschlossen.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|