Skip to content

Instantly share code, notes, and snippets.

@mgermain
Created June 26, 2017 18:22
Show Gist options
  • Save mgermain/63d206c82e2989f2be37ace4e6393439 to your computer and use it in GitHub Desktop.
Save mgermain/63d206c82e2989f2be37ace4e6393439 to your computer and use it in GitHub Desktop.
from __future__ import print_function
import re
import ijson.backends.yajl2 as ijson
with open("movies_rule_agent/actors.txt", "r") as f_actors:
actors = [re.sub(r'\n', '', entry).lower() for entry in f_actors.readlines()]
with open("movies_rule_agent/actor_qids.txt", "w") as f_actor_qids:
with open('wikidata_json_dump/latest-all.json', "r") as f_wiki:
for obj in ijson.items(f_wiki, 'item'):
if "en" in obj['labels']:
actor_name = obj["labels"]["en"]["value"].lower()
if actor_name in actors:
print(actor_name)
f_actor_qids.write("{},{}\n".format(actor_name, obj['id']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment