mnowotka/mol2tar.py

## mol2tar.py
# This scripts reads a list of compounds, identified by their chembl ids from a CSV file
# given as an input. For each compound it find related targets, optionally filtered by
# organism. It saves a mepping between the compound and targets in the output CSV file.

# import libraries for reading csv files, parsing command line arguments
# and the python client library
import csv
import argparse
from chembl_webresource_client.new_client import new_client

# Some constants/defaults
DEFAULT_INPUT = "compounds_list_lite.csv"
DEFAULT_OUTPUT = "./python_out.csv"

# command line arguments definition and parsing:
parser = argparse.ArgumentParser(description='Map compounds into tergets')
parser.add_argument('--input', '-i', default=DEFAULT_INPUT, dest="input", help="Path to input CSV file with compound chembl ids [default]")
parser.add_argument('--output', '-o', default=DEFAULT_OUTPUT, dest="output", help="Path to output CSV file with targets [default]")
parser.add_argument('--organism', '-O', default=None, dest="organism", help="Filter targets by organisms [default]")

args = parser.parse_args()

# First, let's just parse the csv file to extract compounds ChEMBL IDs:
mols = []
with open(args.input, 'rb') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        mols.append(row[0])

# Iterate over ids in loop to fetch related activities and extact target ids
rows = []
for mol in mols:
    # printing here just to see the progress
    print mol
    row = set()
    # we jump from compounds to targets through activities:
    activities = new_client.activity.filter(molecule_chembl_id=mol)
    # optionally applying organism filter
    if args.organism:
        activities = activities.filter(target_organism=args.organism)
    # extracting target ChEMBL IDs from activities:
    for act in activities:
        row.add(act['target_chembl_id'])
    row = [mol] + sorted(list(row))
    rows.append(row)

# Finally write it to the output csv file
with open(args.output, 'wb') as csvfile:
    writer = csv.writer(csvfile)
    for row in rows:
        writer.writerow(row)
	# This scripts reads a list of compounds, identified by their chembl ids from a CSV file
	# given as an input. For each compound it find related targets, optionally filtered by
	# organism. It saves a mepping between the compound and targets in the output CSV file.

	# import libraries for reading csv files, parsing command line arguments
	# and the python client library
	import csv
	import argparse
	from chembl_webresource_client.new_client import new_client

	# Some constants/defaults
	DEFAULT_INPUT = "compounds_list_lite.csv"
	DEFAULT_OUTPUT = "./python_out.csv"

	# command line arguments definition and parsing:
	parser = argparse.ArgumentParser(description='Map compounds into tergets')
	parser.add_argument('--input', '-i', default=DEFAULT_INPUT, dest="input", help="Path to input CSV file with compound chembl ids [default]")
	parser.add_argument('--output', '-o', default=DEFAULT_OUTPUT, dest="output", help="Path to output CSV file with targets [default]")
	parser.add_argument('--organism', '-O', default=None, dest="organism", help="Filter targets by organisms [default]")

	args = parser.parse_args()

	# First, let's just parse the csv file to extract compounds ChEMBL IDs:
	mols = []
	with open(args.input, 'rb') as csvfile:
	reader = csv.reader(csvfile)
	for row in reader:
	mols.append(row[0])

	# Iterate over ids in loop to fetch related activities and extact target ids
	rows = []
	for mol in mols:
	# printing here just to see the progress
	print mol
	row = set()
	# we jump from compounds to targets through activities:
	activities = new_client.activity.filter(molecule_chembl_id=mol)
	# optionally applying organism filter
	if args.organism:
	activities = activities.filter(target_organism=args.organism)
	# extracting target ChEMBL IDs from activities:
	for act in activities:
	row.add(act['target_chembl_id'])
	row = [mol] + sorted(list(row))
	rows.append(row)

	# Finally write it to the output csv file
	with open(args.output, 'wb') as csvfile:
	writer = csv.writer(csvfile)
	for row in rows:
	writer.writerow(row)