Skip to content

Instantly share code, notes, and snippets.

@mnowotka
Created May 30, 2017 16:11
Show Gist options
  • Save mnowotka/6d3066772b7f170213a303ead75a7f3d to your computer and use it in GitHub Desktop.
Save mnowotka/6d3066772b7f170213a303ead75a7f3d to your computer and use it in GitHub Desktop.
import csv
from chembl_webresource_client.new_client import new_client
# This will be our resulting structure mapping compound ChEMBL IDs into target uniprot IDs
compounds2targets = dict()
# First, let's just parse the csv file to extract compounds ChEMBL IDs:
with open('compounds_list.csv', 'rb') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
compounds2targets[row[0]] = set()
# OK, we have our source IDs, let's process them in chunks:
chunk_size = 50
keys = compounds2targets.keys()
for i in range(0, len(keys), chunk_size):
# we jump from compounds to targets through activities:
activities = new_client.activity.filter(molecule_chembl_id__in=keys[i:i + chunk_size])
# extracting target ChEMBL IDs from activities:
for act in activities:
compounds2targets[act['molecule_chembl_id']].add(act['target_chembl_id'])
# OK, now our dictionary maps from compound ChEMBL IDs into target ChEMBL IDs
# We would like to replace target ChEMBL IDs with uniprot IDs
for key, val in compounds2targets.items():
# We don't know how many targets are assigned to a given compound so again it's
# better to process targets in chunks:
lval = list(val)
uniprots = set()
for i in range(0, len(val), chunk_size):
targets = new_client.target.filter(target_chembl_id__in=lval[i:i + chunk_size])
uniprots |= set(sum([[comp['accession'] for comp in t['target_components']] for t in targets],[]))
compounds2targets[key] = uniprots
# Finally write it to the output csv file
with open('compounds_2_targets.csv', 'wb') as csvfile:
writer = csv.writer(csvfile)
for key, val in compounds2targets.items():
writer.writerow([key] + list(val))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment