Last active
August 25, 2016 01:18
-
-
Save davidallenfox/002037f671b21e75af5a575bf91a6c62 to your computer and use it in GitHub Desktop.
Accepts a term, queries DBpedia for page redirects, returns a set of synonyms.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from SPARQLWrapper import SPARQLWrapper, JSON, XML, N3, RDF | |
def dbpedia(term): | |
term = term.strip() | |
nterm = term.capitalize().replace(' ','_') | |
query = """ | |
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | |
SELECT ?label | |
WHERE | |
{ | |
{ | |
<http://dbpedia.org/resource/VALUE> <http://dbpedia.org/ontology/wikiPageRedirects> ?x. | |
?x rdfs:label ?label. | |
} | |
UNION | |
{ | |
<http://dbpedia.org/resource/VALUE> <http://dbpedia.org/ontology/wikiPageRedirects> ?y. | |
?x <http://dbpedia.org/ontology/wikiPageRedirects> ?y. | |
?x rdfs:label ?label. | |
} | |
UNION | |
{ | |
?x <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/VALUE>. | |
?x rdfs:label ?label. | |
} | |
UNION | |
{ | |
?y <http://dbpedia.org/ontology/wikiPageRedirects> <http://dbpedia.org/resource/VALUE>. | |
?x <http://dbpedia.org/ontology/wikiPageRedirects> ?y. | |
?x rdfs:label ?label. | |
} | |
FILTER (lang(?label) = 'en') | |
} | |
""" | |
nquery = query.replace('VALUE',nterm) | |
sparql = SPARQLWrapper("http://dbpedia.org/sparql") | |
sparql.setQuery(nquery) | |
rterms = [] | |
sparql.setReturnFormat(JSON) | |
try: | |
ret = sparql.query() | |
results = ret.convert() | |
requestGood = True | |
except Exception, e: | |
results = str(e) | |
requestGood = False | |
if requestGood == False: | |
return "Problem communicating with the server: ", results | |
elif (len(results["results"]["bindings"]) == 0): | |
return "No results found" | |
else: | |
for result in results["results"]["bindings"]: | |
label = result["label"]["value"] | |
rterms.append(label) | |
alts = ', '.join(rterms) | |
alts = alts.encode('utf-8') | |
return alts | |
if __name__ == "__main__": | |
alts = dbpedia(sys.argv[1]) | |
print alts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To test, invoke on command line:
python dbpedia_redirects.py "Bill Clinton"
This should return something like:
Bill Clinton, William Jefferson Blythe IV, William Jefferson Blythe III, 42nd Pr esident of the United States, Bil Clinton, BillClinton, Bill Blythe IV, Bill Cli nton, Bill Clinton's Post-Presidency, Bill Clinton's Post Presidency, Bill J. C linton, Bill Jefferson Clinton, Bill Klinton, Bill clinton, Billl Clinton, Billl l Clinton, Billy Clinton, Bubba Clinton, Buddy (Clinton's dog), Bull Clinton, Cl inton, Bill, Clinton Gore Administration, Clintonesque, Klin-ton, MTV President, MTV president, President Bill Clinton, President Clinton, The MTV President, US President Bill Clinton, Virginia Cassidy Blythe, Virginia Clinton, Virginia Cli nton Kelly, Virginia Kelly, Willam Jefferson Blythe III, WilliamJeffersonClinton , William "Bill" Clinton, William Blythe III, William Clinton, William J. Blythe , William J. Blythe III, William J. Clinton, William J Clinton, William Jefferso n "Bill" Clinton, William Jefferson (Bill) Clinton, William Jefferson Clinton, W illiam clinton, Willy Clinton, Bill Clinton's sex scandals, @BillClinton