Last active
July 27, 2021 01:43
-
-
Save bmamlin/458eb611e1ad99f9be4f12858064cc28 to your computer and use it in GitHub Desktop.
Diff OCL import with export
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import ndjson | |
import logging | |
import urllib.parse | |
LOCAL_FILE = 'path/to/import.json' | |
OCL_FILE = 'path/to/export.json' | |
DIFF_FILE = 'path/to/diff.json' | |
logger = logging.getLogger('diff') | |
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) | |
with open(LOCAL_FILE, 'r') as f: | |
local_data = ndjson.load(f) | |
with open(OCL_FILE, 'r') as f: | |
ocl_data = json.load(f) | |
def type_equals(t): return lambda x: x['type'] == t | |
def q(s): return urllib.parse.quote_plus(str(s)) | |
def get_mapping_key(mapping): | |
if 'to_concept_url' in mapping and mapping['to_concept_url']: | |
return '%s--%s--%s' % (mapping['from_concept_url'], mapping['map_type'], mapping['to_concept_url']) | |
return '%s--%s--%sconcepts/%s/' % (mapping['from_concept_url'], mapping['map_type'], mapping['to_source_url'], q(mapping['to_concept_code'])) | |
def compare_concepts(a, b): | |
properties_to_compare = ['external_id', | |
'concept_class', 'datatype', 'retired'] | |
for p in properties_to_compare: | |
if not a[p] == b[p]: | |
logger.warning('Concept "%s" mismatch on %s: "%s" <> "%s"' % ( | |
a['id'], p, a[p], b[p])) | |
def compare_mappings(a, b): | |
pass | |
out = open(DIFF_FILE, 'w') | |
out.write('{\n') | |
ocl_concepts_cache = {} | |
ocl_concepts = ocl_data['concepts'] | |
for c in ocl_concepts: | |
if c['id'] in ocl_concepts_cache: | |
# This shouldn't happen | |
logger.error('OCL has more than one concept with id "%s"' % c['id']) | |
else: | |
ocl_concepts_cache[c['id']] = c | |
out.write(' "missing_concepts":[') | |
removed_concepts = {} | |
first = True | |
for c in filter(type_equals('Concept'), local_data): | |
if not c['id'] in ocl_concepts_cache: | |
if c['id'] in removed_concepts: | |
removed_concepts[c['id']] += 1 | |
logger.error('Multiple copies of local concept "%s" (n=%i)\n%s' % | |
(c['id'], removed_concepts[c['id']], json.dumps(c))) | |
else: | |
logger.warning('OCL does not have concept with id "%s"' % c['id']) | |
if not first: | |
out.write(',') | |
out.write('\n %s' % json.dumps(c)) | |
first = False | |
else: | |
compare_concepts(c, ocl_concepts_cache[c['id']]) | |
# Remove concept from cache (if 1:1 match, cache should end up empty when loop ends) | |
del ocl_concepts_cache[c['id']] | |
removed_concepts[c['id']] = 1 | |
if not first: | |
out.write('\n ') | |
out.write('],\n') | |
# Any concepts remaining in cache represent extra OCL entries | |
out.write(' "extra_concepts": [') | |
first = True | |
for c in ocl_concepts_cache: | |
logger.warning('OCL has extra concept with id "%s"' % c['id']) | |
if not first: | |
out.write(',') | |
out.write('\n %s' % json.dumps(c)) | |
first = False | |
# logger.debug('DELETE %s/orgs/%s/concepts/%s/' % ('', c['owner'], c['id'])) | |
if not first: | |
out.write('\n ') | |
out.write('],\n') | |
ocl_mappings_cache = {} | |
ocl_mappings = ocl_data['mappings'] | |
for m in ocl_mappings: | |
key = get_mapping_key(m) | |
if m['retired'] == True: | |
continue # skip retired mappings | |
if key in ocl_mappings_cache: | |
logger.error('OCL has more than one mapping for "%s"' % key) | |
else: | |
ocl_mappings_cache[key] = m | |
out.write(' "missing_mappings": [') | |
removed_mappings = {} | |
first = True | |
for m in filter(type_equals('Mapping'), local_data): | |
key = get_mapping_key(m) | |
if not key in ocl_mappings_cache: | |
if key in removed_mappings: | |
removed_mappings[key] += 1 | |
logger.error('Multiple copies of local mapping "%s" (n=%i)\n%s' % | |
(key, removed_mappings[key], json.dumps(m))) | |
else: | |
logger.warning('OCL does not have mapping "%s"' % key) | |
if not first: | |
out.write(',') | |
out.write('\n %s' % json.dumps(m)) | |
first = False | |
else: | |
compare_mappings(m, ocl_mappings_cache[key]) | |
# Remove mapping from cache (if 1:1 match, cache should end up empty) | |
del ocl_mappings_cache[key] | |
removed_mappings[key] = 1 | |
if not first: | |
out.write('\n ') | |
out.write('],\n') | |
# Any mappings remaining in cache represent extra OCL entries | |
out.write(' "extra_mappings": [') | |
first = True | |
for m in ocl_mappings_cache: | |
# logger.warning('OCL has extra mapping "%s"' % m) | |
if ocl_mappings_cache[m]['retired'] != True: | |
if not first: | |
out.write(',') | |
out.write('\n %s' % json.dumps(ocl_mappings_cache[m])) | |
first = False | |
if not first: | |
out.write('\n ') | |
out.write(']\n') | |
out.write('}') | |
out.close() | |
print('done') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
docker run --rm -v $PWD:/app -w /app --env-file .env python:3-alpine \ | |
sh -c "pip install ndjson requests > /dev/null && python diff.py" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment