Created
April 29, 2013 12:01
-
-
Save zero323/5481189 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import os | |
import json | |
import urllib2 | |
DATA_URL = 'https://dl.dropboxusercontent.com/s/blq0xxvx5asfnkl/combined.json?token_hash=AAGZxGLemmo09Ib-_P6kyjnzEoTzTMcIVjJ5b95zdb0Eqw&dl=1' | |
DATA_FILE = 'combined_sample.json' | |
class ChemblStatsClient: | |
def __init__(self): | |
if not os.path.exists(DATA_FILE): | |
self.data = json.loads(urllib2.urlopen(DATA_URL).read()) | |
with open(DATA_FILE, 'w') as fw: | |
fw.write(json.dumps(self.data, indent=2)) | |
else: | |
self.data = json.loads(open(DATA_FILE).read()) | |
def print_basic_stats(self): | |
message = ''' | |
Targets: {0} | |
Train compounds subset: {1} | |
Test compounds subset: {2} | |
Bioactivities: {3} | |
''' | |
print(message.format( | |
len(self.data[u'targets']), | |
len(self.data[u'train_compounds']), | |
len(self.data[u'test_compounds']), | |
len(self.data[u'bioactivities']) | |
)) | |
def print_prediction_stats(self): | |
stats = reduce( | |
lambda x, y: (x[0] + y[0], x[1] + y[1], x[2] + y[2]), | |
map(lambda compound: ( | |
len(compound['predicted_targets']), | |
len(compound['known_targets']), | |
len(set(compound['predicted_targets']).intersection(set(compound['known_targets'])))), | |
self.data[u'test_compounds'] | |
), | |
(0, 0, 0) | |
) | |
message = ''' | |
Accurately predicted: {0} / Known targets: {1} | Ratio: {2} | |
''' | |
no_test_compounds = float(len(self.data[u'test_compounds'])) | |
print(message.format(stats[2], stats[1], float(stats[2]) / float(stats[1]))) | |
def get_known_vs_prediction(self, print_ = True): | |
if print_: | |
for line in map(lambda x: (x['chemblId'], x['known_targets'], x['predicted_targets']), | |
self.data[u'test_compounds']): | |
print('{0}\t\t\t{1}\t\t{2}'.format(*line)) | |
def get_bioactivity(self, compound_chemblid, target_chemblid): | |
return filter(lambda x: x['ingredient_cmpd_chemblid'] == compound_chemblid and x['target_chemblid'] == target_chemblid, | |
self.data[u'bioactivities']) | |
def get_compound(self, compound_chemblid): | |
return ( | |
filter(lambda x: x['chemblId'] == compound_chemblid, self.data[u'test_compounds']) + | |
filter(lambda x: x['chemblId'] == compound_chemblid, self.data[u'train_compounds'])) | |
def get_target(self, target_chemblid): | |
return filter(lambda x: x['chemblId'] == target_chemblid, self.data[u'targets']) | |
def main(): | |
client = ChemblStatsClient() | |
client.print_basic_stats() | |
client.print_prediction_stats() | |
client.get_known_vs_prediction() | |
if __name__ == '__main__': | |
main() |
Author
zero323
commented
Apr 29, 2013
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment