Created
June 9, 2021 09:21
-
-
Save rosalieper/83354c6677c92a008f412b21d7d41575 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import json | |
import os | |
import bz2 | |
import gzip | |
def read_dump(path): | |
mode = 'r' | |
file_ = os.path.split(path)[-1] | |
if file_.endswith('.gz'): | |
f = gzip.open(path, mode) | |
elif file_.endswith('.bz2'): | |
f = bz2.BZ2File(path, mode) | |
elif file_.endswith('.json'): | |
f = open(path, mode) | |
else: | |
raise NotImplementedError(f'Reading file {file_} is not supported') | |
try: | |
for line in f: | |
if isinstance(line, bytes): | |
line = line.decode('utf-8') | |
try: | |
yield json.loads(line.strip().strip(',')) | |
except json.JSONDecodeError: | |
continue | |
finally: | |
f.close() | |
with open('badprecisions.txt', 'w') as f: | |
f.write('') | |
precision = 0.0 | |
for item in read_dump(sys.argv[1]): | |
for pid in item['claims']: | |
for claim in item['claims'][pid]: | |
datatype = claim['mainsnak']['datatype'] | |
if datatype == 'globe-coordinate': | |
try: | |
precision = claim['mainsnak']['datavalue']['value']['precision'] | |
except: | |
continue | |
if precision != None and (precision >= 365.0 or precision <= -365.0): | |
with open('badprecisions.txt', 'a') as f: | |
f.write(item['id'] +'\t'+str(precision)+ '\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment