mnowotka/mol2tar.sh

## mol2tar.sh
#!/bin/bash

# This script will get a list of targets related to a compound.
# Each compound, identified by its chembl id will be read from
# the CSV file specified as the first argument.

# This script assumes that you have a 'jq' command line tool installed.
# Installation instructions can be found here:
# https://stedolan.github.io/jq/download/

# Some constants first: we need to know the root URL of the ChEMBL API.
# We will also use a tool called "resty" and we first need to download it
# from the Internet so we need to know its adress.
DEFAULT_INPUT=${1-"./compounds_list_lite.csv"}
DEFAULT_OUTPUT=${2-"./bash_out.csv"}
RESTY_LOCATION="https://raw.githubusercontent.com/micha/resty/master/resty"
URL_ROOT="https://www.ebi.ac.uk/chembl/api/data"

# Here, we download "resty", a tiny but useful wrapper for curl to make
# out task easier
curl -L $RESTY_LOCATION > resty

# A dot (.) means "sourcing" the "resty" script we've just downloaded
# To "source" means more or less to activate the script so we can use it
# in the following lines
. resty

# Once the "resty" tool is sourced we need to configure it. The only configuration is
# specifying the API root URL
resty $URL_ROOT

# Once we have "resty" loaded and configured we can loop through the CSV file
# line by line
cat $DEFAULT_INPUT|while read line
  do
  	# For each line we extract the first column and store it in the "chembl_id" variable
    read -d, chembl_id < <(echo $line)

    # Just prinint the id here to see if reading from CSV works...
    echo "Targets for: $chembl_id"

    # For each chembl_id extracted from the file we make a single API call to retrive
    # information about compound bioactivities, extract target ids using 'jq', sort them
    # and making resulting target ids unique.
    targets=$(GET "/activity.json" -q "molecule_chembl_id=$chembl_id" | jq '.activities[].target_chembl_id' | tr -d '"' | sort | uniq | tr "\n" "," )
    GET "/activity.json" -q "molecule_chembl_id=$chembl_id" | jq '.activities[].target_chembl_id'
    echo "$chembl_id,$targets" >> $DEFAULT_OUTPUT
  done
	#!/bin/bash

	# This script will get a list of targets related to a compound.
	# Each compound, identified by its chembl id will be read from
	# the CSV file specified as the first argument.

	# This script assumes that you have a 'jq' command line tool installed.
	# Installation instructions can be found here:
	# https://stedolan.github.io/jq/download/

	# Some constants first: we need to know the root URL of the ChEMBL API.
	# We will also use a tool called "resty" and we first need to download it
	# from the Internet so we need to know its adress.
	DEFAULT_INPUT=${1-"./compounds_list_lite.csv"}
	DEFAULT_OUTPUT=${2-"./bash_out.csv"}
	RESTY_LOCATION="https://raw.githubusercontent.com/micha/resty/master/resty"
	URL_ROOT="https://www.ebi.ac.uk/chembl/api/data"

	# Here, we download "resty", a tiny but useful wrapper for curl to make
	# out task easier
	curl -L $RESTY_LOCATION > resty

	# A dot (.) means "sourcing" the "resty" script we've just downloaded
	# To "source" means more or less to activate the script so we can use it
	# in the following lines
	. resty

	# Once the "resty" tool is sourced we need to configure it. The only configuration is
	# specifying the API root URL
	resty $URL_ROOT

	# Once we have "resty" loaded and configured we can loop through the CSV file
	# line by line
	cat $DEFAULT_INPUT\|while read line
	do
	# For each line we extract the first column and store it in the "chembl_id" variable
	read -d, chembl_id < <(echo $line)

	# Just prinint the id here to see if reading from CSV works...
	echo "Targets for: $chembl_id"

	# For each chembl_id extracted from the file we make a single API call to retrive
	# information about compound bioactivities, extract target ids using 'jq', sort them
	# and making resulting target ids unique.
	targets=$(GET "/activity.json" -q "molecule_chembl_id=$chembl_id" \| jq '.activities[].target_chembl_id' \| tr -d '"' \| sort \| uniq \| tr "\n" "," )
	GET "/activity.json" -q "molecule_chembl_id=$chembl_id" \| jq '.activities[].target_chembl_id'
	echo "$chembl_id,$targets" >> $DEFAULT_OUTPUT
	done