102 lines
3.6 KiB
Python
102 lines
3.6 KiB
Python
import pandas as pd
|
|
import requests
|
|
import time
|
|
import os
|
|
|
|
def match_gnd(token, delay=0.3):
|
|
"""GND-Abfrage für ein Schlagwort, gibt erstes Ergebnis zurück"""
|
|
url = f"https://lobid.org/gnd/search?q={token}&format=json"
|
|
try:
|
|
resp = requests.get(url, timeout=5)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
if 'member' in data and data['member']:
|
|
first = data['member'][0]
|
|
return first.get('preferredName'), first.get('gndIdentifier')
|
|
except Exception as e:
|
|
print(f"Fehler bei GND-Abfrage für '{token}': {e}")
|
|
time.sleep(delay)
|
|
return None, None
|
|
|
|
def load_exlibris_refs(path):
|
|
"""CSV einlesen, Scan-Zuordnung, Platzhalter-Inventarnummer, GND-Abgleich"""
|
|
df = pd.read_csv(path, dtype=str, header=0)
|
|
# erste Spalte leer? → "Kürzel"
|
|
if df.columns[0].strip() == '':
|
|
df.rename(columns={df.columns[0]: 'Kürzel'}, inplace=True)
|
|
df.fillna('', inplace=True)
|
|
|
|
# Scan-Level-Spalten
|
|
level_cols = [c for c in df.columns if c.strip() in ['0','1','2','3','4']]
|
|
|
|
obj_list = []
|
|
current_obj = None
|
|
placeholder_counter = 1
|
|
|
|
for _, row in df.iterrows():
|
|
has_0 = row['0'].strip() if '0' in df.columns else ''
|
|
row_refs = []
|
|
for c in level_cols:
|
|
val = row[c].strip()
|
|
if val:
|
|
row_refs.append({'level': c, 'scan_ref': val})
|
|
|
|
if has_0:
|
|
if current_obj:
|
|
obj_list.append(current_obj)
|
|
core_data = {col: row[col] for col in df.columns if col not in level_cols}
|
|
# Inventarnummer prüfen
|
|
inv = core_data.get('Inventarnummer','').strip()
|
|
if not inv:
|
|
core_data['Inventarnummer'] = f'PL-{placeholder_counter:04d}'
|
|
placeholder_counter += 1
|
|
# GND-Abgleich
|
|
obj_descr = core_data.get('Objektbeschreibung','')
|
|
gnd_name, gnd_id = None, None
|
|
if obj_descr:
|
|
tokens = [t.strip() for t in obj_descr.split(',') if t.strip()]
|
|
for t in tokens:
|
|
name, gid = match_gnd(t)
|
|
if gid:
|
|
gnd_name = name
|
|
gnd_id = gid
|
|
break
|
|
core_data['GND_Name'] = gnd_name
|
|
core_data['GND_ID'] = gnd_id
|
|
current_obj = core_data
|
|
current_obj['ScanReferenzen'] = row_refs
|
|
else:
|
|
if current_obj:
|
|
current_obj['ScanReferenzen'].extend(row_refs)
|
|
|
|
if current_obj:
|
|
obj_list.append(current_obj)
|
|
|
|
out_df = pd.DataFrame(obj_list)
|
|
core_fields = ['Kürzel','Inventarnummer','Standort','Jahr','Urheber','Eigner',
|
|
'Objektbeschreibung','Material','Maße (in cm)',
|
|
'Objekttyp','Inschrift','Anmerkungen','ScanReferenzen',
|
|
'GND_Name','GND_ID']
|
|
available = [c for c in core_fields if c in out_df.columns]
|
|
return out_df[available]
|
|
|
|
# ====================
|
|
# Hauptteil
|
|
# ====================
|
|
if __name__ == "__main__":
|
|
# CSV im gleichen Ordner suchen
|
|
csv_files = [f for f in os.listdir('.') if f.lower().endswith('.csv')]
|
|
if not csv_files:
|
|
print("Keine CSV-Datei im aktuellen Ordner gefunden.")
|
|
exit(1)
|
|
# nimm die erste gefundene CSV
|
|
input_csv = csv_files[0]
|
|
print(f"Verwende CSV-Datei: {input_csv}")
|
|
|
|
df = load_exlibris_refs(input_csv)
|
|
|
|
# Ergebnis als Testergebnis.csv speichern
|
|
output_file = "Testergebnis.csv"
|
|
df.to_csv(output_file, index=False)
|
|
print(f"Aufbereitete Daten gespeichert als {output_file}")
|