75 lines
2.4 KiB
Python
75 lines
2.4 KiB
Python
"""
|
|
Query a SPARQL endpoint and return results as a Pandas dataframe.
|
|
"""
|
|
import io
|
|
from typing import TYPE_CHECKING, Any, Dict, List, Union
|
|
|
|
from SPARQLWrapper.SmartWrapper import Bindings, SPARQLWrapper2, Value
|
|
from SPARQLWrapper.Wrapper import CSV, SELECT, SPARQLWrapper
|
|
|
|
if TYPE_CHECKING:
|
|
import pandas as pd
|
|
|
|
|
|
class QueryException(Exception):
|
|
pass
|
|
|
|
|
|
def get_sparql_dataframe_orig(
|
|
endpoint: str, query: Union[str, bytes]
|
|
) -> "pd.DataFrame":
|
|
"""copy paste from: https://github.com/lawlesst/sparql-dataframe"""
|
|
# pandas inside to avoid requiring it
|
|
import pandas as pd
|
|
|
|
sparql = SPARQLWrapper(endpoint)
|
|
sparql.setQuery(query)
|
|
if sparql.queryType != SELECT:
|
|
raise QueryException("Only SPARQL SELECT queries are supported.")
|
|
sparql.setReturnFormat(CSV)
|
|
results = sparql.query().convert()
|
|
if isinstance(results, bytes):
|
|
_csv = io.StringIO(results.decode("utf-8"))
|
|
return pd.read_csv(_csv, sep=",")
|
|
else:
|
|
raise TypeError(type(results))
|
|
|
|
|
|
def get_sparql_typed_dict(
|
|
endpoint: str, query: Union[str, bytes]
|
|
) -> List[Dict[str, Value]]:
|
|
"""modified from: https://github.com/lawlesst/sparql-dataframe"""
|
|
# pandas inside to avoid requiring it
|
|
import pandas as pd
|
|
# rdflib in here because there is some meta stuff in the setup.py and Travis fails because rdflib is installed later
|
|
import rdflib.term
|
|
sparql = SPARQLWrapper2(endpoint)
|
|
sparql.setQuery(query)
|
|
if sparql.queryType != SELECT:
|
|
raise QueryException("Only SPARQL SELECT queries are supported.")
|
|
# sparql.setReturnFormat(JSON)
|
|
results = sparql.query()
|
|
if not isinstance(results, Bindings):
|
|
raise TypeError(type(results))
|
|
# consider perf hacking later, probably slow
|
|
# convert list of dicts to python types
|
|
d = []
|
|
for x in results.bindings:
|
|
row = {}
|
|
for k in x:
|
|
v = x[k]
|
|
vv = rdflib.term.Literal(v.value, datatype=v.datatype).toPython() # type: ignore[no-untyped-call]
|
|
row[k] = vv
|
|
d.append(row)
|
|
return d
|
|
|
|
|
|
def get_sparql_dataframe(endpoint: str, query: Union[str, bytes]) -> "pd.DataFrame":
|
|
# pandas inside to avoid requiring it
|
|
import pandas as pd
|
|
|
|
d = get_sparql_typed_dict(endpoint, query)
|
|
# TODO: will nan fill somehow, make more strict if there is way of getting the nan types from rdflib
|
|
df = pd.DataFrame(d)
|
|
return df
|