Skip to content

Instantly share code, notes, and snippets.

@rosalieper
Created June 9, 2021 09:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rosalieper/83354c6677c92a008f412b21d7d41575 to your computer and use it in GitHub Desktop.
Save rosalieper/83354c6677c92a008f412b21d7d41575 to your computer and use it in GitHub Desktop.
import sys
import json
import os
import bz2
import gzip
def read_dump(path):
mode = 'r'
file_ = os.path.split(path)[-1]
if file_.endswith('.gz'):
f = gzip.open(path, mode)
elif file_.endswith('.bz2'):
f = bz2.BZ2File(path, mode)
elif file_.endswith('.json'):
f = open(path, mode)
else:
raise NotImplementedError(f'Reading file {file_} is not supported')
try:
for line in f:
if isinstance(line, bytes):
line = line.decode('utf-8')
try:
yield json.loads(line.strip().strip(','))
except json.JSONDecodeError:
continue
finally:
f.close()
with open('badprecisions.txt', 'w') as f:
f.write('')
precision = 0.0
for item in read_dump(sys.argv[1]):
for pid in item['claims']:
for claim in item['claims'][pid]:
datatype = claim['mainsnak']['datatype']
if datatype == 'globe-coordinate':
try:
precision = claim['mainsnak']['datavalue']['value']['precision']
except:
continue
if precision != None and (precision >= 365.0 or precision <= -365.0):
with open('badprecisions.txt', 'a') as f:
f.write(item['id'] +'\t'+str(precision)+ '\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment