-
-
Save jwagenaar/32430cb6f9f7d29c5c3cc9117a911a8a to your computer and use it in GitHub Desktop.
Importing het.io into the Blackfynn platform
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from blackfynn import Blackfynn | |
import json, math | |
# LOGIN INFO | |
use_context = '<your profile name>' | |
dataset_name = 'hetio' | |
# Load JSON dataset | |
with open('hetionet-v1.0.json') as f: | |
data = json.load(f) | |
# Log into the platform and create a dataset | |
bf = Blackfynn(use_context) | |
ds = bf.create_dataset(name = dataset_name) | |
# identifier, name | |
default_props = [ | |
('name', str, 'Name', True), | |
] | |
# Create models | |
json_models = data['metanode_kinds'] | |
model_map = {} | |
for iM in json_models: | |
model_map[iM] = {} | |
# map records to blackfynn layout | |
json_nodes = data['nodes'] | |
for iNode in json_nodes: | |
iData = iNode['data'] | |
iDataKeys = iData.keys() | |
iDataKeys.append('identifier') | |
cur_model = model_map[iNode['kind']] | |
cur_model_keys = cur_model.keys() | |
for iKey in iDataKeys: | |
if (iKey == 'identifier'): | |
datapoint = iNode['identifier'] | |
else: | |
datapoint = iNode['data'][iKey] | |
if iKey not in cur_model_keys: | |
if isinstance(datapoint, basestring): | |
cur_model[iKey] = (iKey, str) | |
elif isinstance(datapoint, int): | |
cur_model[iKey] = (iKey, int) | |
elif isinstance(datapoint, float): | |
cur_model[iKey] = (iKey, float) | |
# create models in Blackfynn dataset | |
for iM in json_models: | |
model = ds.create_concept(iM.replace(" ", "_")); | |
model.add_properties(default_props) | |
prop_array = [] | |
for iKey in model_map[iM].keys(): | |
prop_array.append(model_map[iM][iKey]) | |
model.add_properties(prop_array) | |
# Create map of objects by model type | |
record_map = {} | |
blackfynn_models = {} | |
for iM in json_models: | |
record_map[iM] = [] | |
blackfynn_models[iM] = [] | |
for iNode in json_nodes: | |
curItem = {'identifier': iNode['identifier'], 'name': iNode['name']} | |
for iData in iNode['data'].keys(): | |
curItem[iData] = iNode['data'][iData] | |
record_map[iNode['kind']].append( curItem ) | |
# Create records in Blackfynn dataset (create 500 records per call) | |
for iM in record_map.keys(): | |
curModel = ds.get_concept(iM.replace(" ", "_")) | |
callSz = 500 | |
nrCalls = int(math.floor(len(record_map[iM])/callSz)) | |
for i in range(0, nrCalls): | |
print('creating ' + str(i*callSz) + ' : ' + str(((i*callSz + callSz)-1))) | |
blackfynn_models[iM].extend(curModel.create_many(*record_map[iM][i*callSz:(i*callSz + callSz)])) | |
if ( (len(record_map[iM])%callSz) > 0): | |
print('creating last number of records' + str(nrCalls*callSz) + ' : ' + str(len(record_map[iM])-1)) | |
blackfynn_models[iM].extend(curModel.create_many(*record_map[iM][nrCalls*callSz: len(record_map[iM])])) | |
# Create lookup for blackfynn models: | |
lookup = {} | |
for iM in json_models: | |
lookup[iM] = {} | |
for iM in blackfynn_models: | |
print iM | |
for iR in blackfynn_models[iM]: | |
print iR | |
lookup[iM][iR.get('identifier')] = iR | |
# Bulk create records and map blackfynn object to hetio objects | |
link_edges = {} | |
link_map = {} | |
all_edges = data['edges'] | |
idx = 0 | |
for edge in all_edges: | |
print(str(idx) + ' ' + edge['source_id'][0] + ' : ' + str(edge['source_id'][1]) ) | |
source = lookup[edge['source_id'][0]][edge['source_id'][1]] | |
target = lookup[edge['target_id'][0]][edge['target_id'][1]] | |
if edge['kind'] not in link_map.keys(): | |
link_edges[edge['kind']] = ds.create_relationship(edge['kind'], 'het.io edge') | |
link_map[edge['kind']] = [] | |
link_map[edge['kind']].append({'source': source, 'destination':target, 'relationship_type': edge['kind'], 'values': {}}) | |
idx = idx + 1 | |
# Create Links (create 500 links per call) | |
all_links = {} | |
for iM in link_map.keys(): | |
all_links[iM] = [] | |
for iM in link_map.keys(): | |
curLink = ds.get_relationship(iM) | |
callSz = 500 | |
nrCalls = int(math.floor(len(link_map[iM])/callSz)) | |
for i in range(0, nrCalls): | |
print('creating ' + iM + ' - ' + str(i*callSz) + ' : ' + str(((i*callSz + callSz)-1))) | |
all_links[iM].extend(curLink.create_many(*link_map[iM][i*callSz:(i*callSz + callSz)])) | |
if ( (len(link_map[iM])%callSz) > 0): | |
print('creating final batch of records: ' + str(nrCalls*callSz) + ' : ' + str(len(link_map[iM])-1)) | |
all_links[iM].extend(curLink.create_many(*link_map[iM][nrCalls*callSz: len(link_map[iM])])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment