Skip to content

Instantly share code, notes, and snippets.

@mnowotka
Forked from anonymous/mol2tar.py
Created July 12, 2017 08:45
Show Gist options
  • Save mnowotka/f634c0b8d88812af332216e34770782b to your computer and use it in GitHub Desktop.
Save mnowotka/f634c0b8d88812af332216e34770782b to your computer and use it in GitHub Desktop.
# This scripts reads a list of compounds, identified by their chembl ids from a CSV file
# given as an input. For each compound it find related targets, optionally filtered by
# organism. It saves a mepping between the compound and targets in the output CSV file.
# import libraries for reading csv files, parsing command line arguments
# and the python client library
import csv
import argparse
from chembl_webresource_client.new_client import new_client
# Some constants/defaults
DEFAULT_INPUT = "compounds_list_lite.csv"
DEFAULT_OUTPUT = "./python_out.csv"
# command line arguments definition and parsing:
parser = argparse.ArgumentParser(description='Map compounds into tergets')
parser.add_argument('--input', '-i', default=DEFAULT_INPUT, dest="input", help="Path to input CSV file with compound chembl ids [default]")
parser.add_argument('--output', '-o', default=DEFAULT_OUTPUT, dest="output", help="Path to output CSV file with targets [default]")
parser.add_argument('--organism', '-O', default=None, dest="organism", help="Filter targets by organisms [default]")
args = parser.parse_args()
# First, let's just parse the csv file to extract compounds ChEMBL IDs:
mols = []
with open(args.input, 'rb') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
mols.append(row[0])
# Iterate over ids in loop to fetch related activities and extact target ids
rows = []
for mol in mols:
# printing here just to see the progress
print mol
row = set()
# we jump from compounds to targets through activities:
activities = new_client.activity.filter(molecule_chembl_id=mol)
# optionally applying organism filter
if args.organism:
activities = activities.filter(target_organism=args.organism)
# extracting target ChEMBL IDs from activities:
for act in activities:
row.add(act['target_chembl_id'])
row = [mol] + sorted(list(row))
rows.append(row)
# Finally write it to the output csv file
with open(args.output, 'wb') as csvfile:
writer = csv.writer(csvfile)
for row in rows:
writer.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment