Created
May 30, 2017 21:04
-
-
Save mnowotka/a8534857e083eb07254c576ac81a5f6e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from chembl_webresource_client.new_client import new_client | |
# This will be our resulting structure mapping compound ChEMBL IDs into target uniprot IDs | |
compounds2targets = dict() | |
# First, let's just parse the csv file to extract compounds ChEMBL IDs: | |
with open('compounds_list.csv', 'rb') as csvfile: | |
reader = csv.reader(csvfile) | |
for row in reader: | |
compounds2targets[row[0]] = set() | |
# OK, we have our source IDs, let's process them in chunks: | |
chunk_size = 50 | |
keys = compounds2targets.keys() | |
for i in range(0, len(keys), chunk_size): | |
# we jump from compounds to targets through activities: | |
activities = new_client.activity.filter(molecule_chembl_id__in=keys[i:i + chunk_size]).filter(target_organism="Homo sapiens") | |
# extracting target ChEMBL IDs from activities: | |
for act in activities: | |
compounds2targets[act['molecule_chembl_id']].add(act['target_chembl_id']) | |
# OK, now our dictionary maps from compound ChEMBL IDs into target ChEMBL IDs | |
# We would like to replace target ChEMBL IDs with uniprot IDs | |
for key, val in compounds2targets.items(): | |
# We don't know how many targets are assigned to a given compound so again it's | |
# better to process targets in chunks: | |
lval = list(val) | |
genes = set() | |
for i in range(0, len(val), chunk_size): | |
targets = new_client.target.filter(target_chembl_id__in=lval[i:i + chunk_size]) | |
for target in targets: | |
for component in target['target_components']: | |
for synonym in component['target_component_synonyms']: | |
if synonym['syn_type'] == "GENE_SYMBOL": | |
genes.add(synonym['component_synonym']) | |
compounds2targets[key] = genes | |
# Finally write it to the output csv file | |
with open('compounds_2_targets.csv', 'wb') as csvfile: | |
writer = csv.writer(csvfile) | |
for key, val in compounds2targets.items(): | |
writer.writerow([key] + list(val)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment