Skip to content

Instantly share code, notes, and snippets.

@harej
Created July 14, 2016 19:13
Show Gist options
  • Save harej/1c29670a205a6bd279cd586204fdbfd5 to your computer and use it in GitHub Desktop.
Save harej/1c29670a205a6bd279cd586204fdbfd5 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
niosh_mode = False
if niosh_mode == True:
seed = "https://query.wikidata.org/sparql?format=json&query=SELECT%20%3Fitem%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP932%20%3Fdummy0%20.%0A%20%20%3Fitem%20wdt%3AP859%20wd%3AQ60346%20.%0A%7D"
else:
seed = "https://query.wikidata.org/sparql?format=json&query=SELECT%20%3Fitem%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP932%20%3Fdummy0%20.%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP859%20wd%3AQ60346%20%7D%0A%7D"
pmc_template = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pmc&linkname=pmc_refs_pubmed&id={0}&tool=niosh_analysis&email=jamesmhare@gmail.com"
get_list = requests.get(seed)
blob = get_list.json()
item_list = [x["item"]["value"].replace("http://www.wikidata.org/entity/", "") for x in blob["results"]["bindings"]]
do_not_generate = {}
for item in item_list:
get_pmcid = requests.get("https://www.wikidata.org/wiki/Special:EntityData/{0}.json".format(item))
pmcid_blob = get_pmcid.json()
pmcid = pmcid_blob["entities"][item]["claims"]["P932"][0]["mainsnak"]["datavalue"]["value"]
if "P2860" in pmcid_blob["entities"][item]["claims"]:
do_not_generate[item] = ["Q" + str(x["mainsnak"]["datavalue"]["value"]["numeric-id"]) for x in pmcid_blob["entities"][item]["claims"]["P2860"]]
else:
do_not_generate[item] = []
cites_list = BeautifulSoup(requests.get(pmc_template.format(pmcid)).text, "xml")
links = cites_list.find_all("Link")
for link in links:
Id = link.Id.string
get_item = requests.get("https://query.wikidata.org/sparql?format=json&query=SELECT%20%3Fitem%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP698%20%22{0}%22%20.%0A%7D".format(Id)).json()
for result in get_item["results"]["bindings"]:
second_wd_item = result["item"]["value"].replace("http://www.wikidata.org/entity/", "")
if second_wd_item not in do_not_generate[item]:
print(item + "\tP2860\t" + second_wd_item)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment