-
-
Save mnowotka/f634c0b8d88812af332216e34770782b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This scripts reads a list of compounds, identified by their chembl ids from a CSV file | |
# given as an input. For each compound it find related targets, optionally filtered by | |
# organism. It saves a mepping between the compound and targets in the output CSV file. | |
# import libraries for reading csv files, parsing command line arguments | |
# and the python client library | |
import csv | |
import argparse | |
from chembl_webresource_client.new_client import new_client | |
# Some constants/defaults | |
DEFAULT_INPUT = "compounds_list_lite.csv" | |
DEFAULT_OUTPUT = "./python_out.csv" | |
# command line arguments definition and parsing: | |
parser = argparse.ArgumentParser(description='Map compounds into tergets') | |
parser.add_argument('--input', '-i', default=DEFAULT_INPUT, dest="input", help="Path to input CSV file with compound chembl ids [default]") | |
parser.add_argument('--output', '-o', default=DEFAULT_OUTPUT, dest="output", help="Path to output CSV file with targets [default]") | |
parser.add_argument('--organism', '-O', default=None, dest="organism", help="Filter targets by organisms [default]") | |
args = parser.parse_args() | |
# First, let's just parse the csv file to extract compounds ChEMBL IDs: | |
mols = [] | |
with open(args.input, 'rb') as csvfile: | |
reader = csv.reader(csvfile) | |
for row in reader: | |
mols.append(row[0]) | |
# Iterate over ids in loop to fetch related activities and extact target ids | |
rows = [] | |
for mol in mols: | |
# printing here just to see the progress | |
print mol | |
row = set() | |
# we jump from compounds to targets through activities: | |
activities = new_client.activity.filter(molecule_chembl_id=mol) | |
# optionally applying organism filter | |
if args.organism: | |
activities = activities.filter(target_organism=args.organism) | |
# extracting target ChEMBL IDs from activities: | |
for act in activities: | |
row.add(act['target_chembl_id']) | |
row = [mol] + sorted(list(row)) | |
rows.append(row) | |
# Finally write it to the output csv file | |
with open(args.output, 'wb') as csvfile: | |
writer = csv.writer(csvfile) | |
for row in rows: | |
writer.writerow(row) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment