Skip to content

Instantly share code, notes, and snippets.

@Ladsgroup
Created March 26, 2021 05:51
Show Gist options
  • Save Ladsgroup/b870447728f251605dadf0e688e9e95e to your computer and use it in GitHub Desktop.
Save Ladsgroup/b870447728f251605dadf0e688e9e95e to your computer and use it in GitHub Desktop.
import sys
import json
import os
import bz2
import gzip
def read_dump(path):
mode = 'r'
file_ = os.path.split(path)[-1]
if file_.endswith('.gz'):
f = gzip.open(path, mode)
elif file_.endswith('.bz2'):
f = bz2.BZ2File(path, mode)
elif file_.endswith('.json'):
f = open(path, mode)
else:
raise NotImplementedError(f'Reading file {file_} is not supported')
try:
for line in f:
if isinstance(line, bytes):
line = line.decode('utf-8')
try:
yield json.loads(line.strip().strip(','))
except json.JSONDecodeError:
continue
finally:
f.close()
with open('P279.txt', 'w') as f:
f.write('')
for item in read_dump(sys.argv[1]):
id_ = item['id']
p31s = []
p279s = []
if 'P279' not in item.get('claims', {}):
continue
if 'P31' not in item.get('claims', {}):
continue
for claim in item.get('claims', {}).get('P31', []):
try:
p31 = claim['mainsnak']['datavalue']['value']['id']
except:
continue
p31s.append(p31)
for claim in item.get('claims', {}).get('P279', []):
try:
p279 = claim['mainsnak']['datavalue']['value']['id']
except:
continue
p279s.append(p279)
with open('P279.txt', 'a') as f:
for p31 in p31s:
f.write(id_[1:] + ' ' + p31[1:] + '\n')
for p279 in p279s:
f.write(id_[1:] + ' ' + p279[1:] + '\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment