import pandas as pd import requests import time import os def match_gnd(token, delay=0.3): """GND-Abfrage für ein Schlagwort, gibt erstes Ergebnis zurück""" url = f"https://lobid.org/gnd/search?q={token}&format=json" try: resp = requests.get(url, timeout=5) if resp.status_code == 200: data = resp.json() if 'member' in data and data['member']: first = data['member'][0] return first.get('preferredName'), first.get('gndIdentifier') except Exception as e: print(f"Fehler bei GND-Abfrage für '{token}': {e}") time.sleep(delay) return None, None def load_exlibris_refs(path): """CSV einlesen, Scan-Zuordnung, Platzhalter-Inventarnummer, GND-Abgleich""" df = pd.read_csv(path, dtype=str, header=0) # erste Spalte leer? → "Kürzel" if df.columns[0].strip() == '': df.rename(columns={df.columns[0]: 'Kürzel'}, inplace=True) df.fillna('', inplace=True) # Scan-Level-Spalten level_cols = [c for c in df.columns if c.strip() in ['0','1','2','3','4']] obj_list = [] current_obj = None placeholder_counter = 1 for _, row in df.iterrows(): has_0 = row['0'].strip() if '0' in df.columns else '' row_refs = [] for c in level_cols: val = row[c].strip() if val: row_refs.append({'level': c, 'scan_ref': val}) if has_0: if current_obj: obj_list.append(current_obj) core_data = {col: row[col] for col in df.columns if col not in level_cols} # Inventarnummer prüfen inv = core_data.get('Inventarnummer','').strip() if not inv: core_data['Inventarnummer'] = f'PL-{placeholder_counter:04d}' placeholder_counter += 1 # GND-Abgleich obj_descr = core_data.get('Objektbeschreibung','') gnd_name, gnd_id = None, None if obj_descr: tokens = [t.strip() for t in obj_descr.split(',') if t.strip()] for t in tokens: name, gid = match_gnd(t) if gid: gnd_name = name gnd_id = gid break core_data['GND_Name'] = gnd_name core_data['GND_ID'] = gnd_id current_obj = core_data current_obj['ScanReferenzen'] = row_refs else: if current_obj: current_obj['ScanReferenzen'].extend(row_refs) if current_obj: obj_list.append(current_obj) out_df = pd.DataFrame(obj_list) core_fields = ['Kürzel','Inventarnummer','Standort','Jahr','Urheber','Eigner', 'Objektbeschreibung','Material','Maße (in cm)', 'Objekttyp','Inschrift','Anmerkungen','ScanReferenzen', 'GND_Name','GND_ID'] available = [c for c in core_fields if c in out_df.columns] return out_df[available] # ==================== # Hauptteil # ==================== if __name__ == "__main__": # CSV im gleichen Ordner suchen csv_files = [f for f in os.listdir('.') if f.lower().endswith('.csv')] if not csv_files: print("Keine CSV-Datei im aktuellen Ordner gefunden.") exit(1) # nimm die erste gefundene CSV input_csv = csv_files[0] print(f"Verwende CSV-Datei: {input_csv}") df = load_exlibris_refs(input_csv) # Ergebnis als Testergebnis.csv speichern output_file = "Testergebnis.csv" df.to_csv(output_file, index=False) print(f"Aufbereitete Daten gespeichert als {output_file}")