Skip to content

Instantly share code, notes, and snippets.

@zero323
Created April 29, 2013 12:01
Show Gist options
  • Save zero323/5481189 to your computer and use it in GitHub Desktop.
Save zero323/5481189 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import os
import json
import urllib2
DATA_URL = 'https://dl.dropboxusercontent.com/s/blq0xxvx5asfnkl/combined.json?token_hash=AAGZxGLemmo09Ib-_P6kyjnzEoTzTMcIVjJ5b95zdb0Eqw&dl=1'
DATA_FILE = 'combined_sample.json'
class ChemblStatsClient:
def __init__(self):
if not os.path.exists(DATA_FILE):
self.data = json.loads(urllib2.urlopen(DATA_URL).read())
with open(DATA_FILE, 'w') as fw:
fw.write(json.dumps(self.data, indent=2))
else:
self.data = json.loads(open(DATA_FILE).read())
def print_basic_stats(self):
message = '''
Targets: {0}
Train compounds subset: {1}
Test compounds subset: {2}
Bioactivities: {3}
'''
print(message.format(
len(self.data[u'targets']),
len(self.data[u'train_compounds']),
len(self.data[u'test_compounds']),
len(self.data[u'bioactivities'])
))
def print_prediction_stats(self):
stats = reduce(
lambda x, y: (x[0] + y[0], x[1] + y[1], x[2] + y[2]),
map(lambda compound: (
len(compound['predicted_targets']),
len(compound['known_targets']),
len(set(compound['predicted_targets']).intersection(set(compound['known_targets'])))),
self.data[u'test_compounds']
),
(0, 0, 0)
)
message = '''
Accurately predicted: {0} / Known targets: {1} | Ratio: {2}
'''
no_test_compounds = float(len(self.data[u'test_compounds']))
print(message.format(stats[2], stats[1], float(stats[2]) / float(stats[1])))
def get_known_vs_prediction(self, print_ = True):
if print_:
for line in map(lambda x: (x['chemblId'], x['known_targets'], x['predicted_targets']),
self.data[u'test_compounds']):
print('{0}\t\t\t{1}\t\t{2}'.format(*line))
def get_bioactivity(self, compound_chemblid, target_chemblid):
return filter(lambda x: x['ingredient_cmpd_chemblid'] == compound_chemblid and x['target_chemblid'] == target_chemblid,
self.data[u'bioactivities'])
def get_compound(self, compound_chemblid):
return (
filter(lambda x: x['chemblId'] == compound_chemblid, self.data[u'test_compounds']) +
filter(lambda x: x['chemblId'] == compound_chemblid, self.data[u'train_compounds']))
def get_target(self, target_chemblid):
return filter(lambda x: x['chemblId'] == target_chemblid, self.data[u'targets'])
def main():
client = ChemblStatsClient()
client.print_basic_stats()
client.print_prediction_stats()
client.get_known_vs_prediction()
if __name__ == '__main__':
main()
@zero323
Copy link
Author

zero323 commented Apr 29, 2013

from get_learning_stats import ChemblStatsClient
client = ChemblStatsClient()
client.print_basic_stats()
client.print_prediction_stats()
compound_chemblid = 'CHEMBL234691'
client.get_compound(compound_chemblid)
target_chemblid = u'CHEMBL3979'
client.get_target(target_chemblid)
client.get_bioactivity(compound_chemblid, target_chemblid)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment