Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
DBPedia
from SPARQLWrapper import SPARQLWrapper, JSON
import html, datetime, os, json, glob, time
def send_sparql_query(q, timeout=None, sleep=0):
time.sleep(sleep)
try:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setReturnFormat(JSON)
if timeout: sparql.setTimeout(timeout)
sparql.setQuery(q)
return sparql.query().convert()
except Exception as exc:
print(' * query failed', exc)
if sleep < 180:
if not sleep: sleep = 1
else: sleep *= 2
return send_sparql_query(q, timeout=timeout, sleep=sleep)
def get_bulk_dbpedia_metadata(limit=1000, offset=0):
query = '''
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dbp: <http://dbpedia.org/property/>
SELECT * WHERE {
?person a dbo:Person; dbp:name
?name FILTER (lang(?name) = 'en')
OPTIONAL { ?person dbo:birthDate ?birth_date }
OPTIONAL { ?person dbo:deathDate ?death_date }
OPTIONAL { ?person dbo:thumbnail ?thumbnail }
OPTIONAL { ?person dbo:abstract ?abstract FILTER (lang(?abstract) = 'en') }
} LIMIT ''' + str(limit) + ''' OFFSET ''' + str(offset)
j = send_sparql_query(query)
l = [{k: i[k]['value'] for k in i} for i in j['results']['bindings']]
return l
l = []
limit = 1000
for i in range(2500):
print(' * fetching page', i)
l += get_bulk_dbpedia_metadata(limit=limit, offset=limit * i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment