Skip to content

Instantly share code, notes, and snippets.

@jwagenaar
Created June 8, 2018 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jwagenaar/32430cb6f9f7d29c5c3cc9117a911a8a to your computer and use it in GitHub Desktop.
Save jwagenaar/32430cb6f9f7d29c5c3cc9117a911a8a to your computer and use it in GitHub Desktop.
Importing het.io into the Blackfynn platform
from blackfynn import Blackfynn
import json, math
# LOGIN INFO
use_context = '<your profile name>'
dataset_name = 'hetio'
# Load JSON dataset
with open('hetionet-v1.0.json') as f:
data = json.load(f)
# Log into the platform and create a dataset
bf = Blackfynn(use_context)
ds = bf.create_dataset(name = dataset_name)
# identifier, name
default_props = [
('name', str, 'Name', True),
]
# Create models
json_models = data['metanode_kinds']
model_map = {}
for iM in json_models:
model_map[iM] = {}
# map records to blackfynn layout
json_nodes = data['nodes']
for iNode in json_nodes:
iData = iNode['data']
iDataKeys = iData.keys()
iDataKeys.append('identifier')
cur_model = model_map[iNode['kind']]
cur_model_keys = cur_model.keys()
for iKey in iDataKeys:
if (iKey == 'identifier'):
datapoint = iNode['identifier']
else:
datapoint = iNode['data'][iKey]
if iKey not in cur_model_keys:
if isinstance(datapoint, basestring):
cur_model[iKey] = (iKey, str)
elif isinstance(datapoint, int):
cur_model[iKey] = (iKey, int)
elif isinstance(datapoint, float):
cur_model[iKey] = (iKey, float)
# create models in Blackfynn dataset
for iM in json_models:
model = ds.create_concept(iM.replace(" ", "_"));
model.add_properties(default_props)
prop_array = []
for iKey in model_map[iM].keys():
prop_array.append(model_map[iM][iKey])
model.add_properties(prop_array)
# Create map of objects by model type
record_map = {}
blackfynn_models = {}
for iM in json_models:
record_map[iM] = []
blackfynn_models[iM] = []
for iNode in json_nodes:
curItem = {'identifier': iNode['identifier'], 'name': iNode['name']}
for iData in iNode['data'].keys():
curItem[iData] = iNode['data'][iData]
record_map[iNode['kind']].append( curItem )
# Create records in Blackfynn dataset (create 500 records per call)
for iM in record_map.keys():
curModel = ds.get_concept(iM.replace(" ", "_"))
callSz = 500
nrCalls = int(math.floor(len(record_map[iM])/callSz))
for i in range(0, nrCalls):
print('creating ' + str(i*callSz) + ' : ' + str(((i*callSz + callSz)-1)))
blackfynn_models[iM].extend(curModel.create_many(*record_map[iM][i*callSz:(i*callSz + callSz)]))
if ( (len(record_map[iM])%callSz) > 0):
print('creating last number of records' + str(nrCalls*callSz) + ' : ' + str(len(record_map[iM])-1))
blackfynn_models[iM].extend(curModel.create_many(*record_map[iM][nrCalls*callSz: len(record_map[iM])]))
# Create lookup for blackfynn models:
lookup = {}
for iM in json_models:
lookup[iM] = {}
for iM in blackfynn_models:
print iM
for iR in blackfynn_models[iM]:
print iR
lookup[iM][iR.get('identifier')] = iR
# Bulk create records and map blackfynn object to hetio objects
link_edges = {}
link_map = {}
all_edges = data['edges']
idx = 0
for edge in all_edges:
print(str(idx) + ' ' + edge['source_id'][0] + ' : ' + str(edge['source_id'][1]) )
source = lookup[edge['source_id'][0]][edge['source_id'][1]]
target = lookup[edge['target_id'][0]][edge['target_id'][1]]
if edge['kind'] not in link_map.keys():
link_edges[edge['kind']] = ds.create_relationship(edge['kind'], 'het.io edge')
link_map[edge['kind']] = []
link_map[edge['kind']].append({'source': source, 'destination':target, 'relationship_type': edge['kind'], 'values': {}})
idx = idx + 1
# Create Links (create 500 links per call)
all_links = {}
for iM in link_map.keys():
all_links[iM] = []
for iM in link_map.keys():
curLink = ds.get_relationship(iM)
callSz = 500
nrCalls = int(math.floor(len(link_map[iM])/callSz))
for i in range(0, nrCalls):
print('creating ' + iM + ' - ' + str(i*callSz) + ' : ' + str(((i*callSz + callSz)-1)))
all_links[iM].extend(curLink.create_many(*link_map[iM][i*callSz:(i*callSz + callSz)]))
if ( (len(link_map[iM])%callSz) > 0):
print('creating final batch of records: ' + str(nrCalls*callSz) + ' : ' + str(len(link_map[iM])-1))
all_links[iM].extend(curLink.create_many(*link_map[iM][nrCalls*callSz: len(link_map[iM])]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment