-
-
Save mnowotka/24aca898de030efc77c3d01939656546 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script will get a list of targets related to a compound. | |
# Each compound, identified by its chembl id will be read from | |
# the CSV file specified as the first argument. | |
# This script assumes that you have a 'jq' command line tool installed. | |
# Installation instructions can be found here: | |
# https://stedolan.github.io/jq/download/ | |
# Some constants first: we need to know the root URL of the ChEMBL API. | |
# We will also use a tool called "resty" and we first need to download it | |
# from the Internet so we need to know its adress. | |
DEFAULT_INPUT=${1-"./compounds_list_lite.csv"} | |
DEFAULT_OUTPUT=${2-"./bash_out.csv"} | |
RESTY_LOCATION="https://raw.githubusercontent.com/micha/resty/master/resty" | |
URL_ROOT="https://www.ebi.ac.uk/chembl/api/data" | |
# Here, we download "resty", a tiny but useful wrapper for curl to make | |
# out task easier | |
curl -L $RESTY_LOCATION > resty | |
# A dot (.) means "sourcing" the "resty" script we've just downloaded | |
# To "source" means more or less to activate the script so we can use it | |
# in the following lines | |
. resty | |
# Once the "resty" tool is sourced we need to configure it. The only configuration is | |
# specifying the API root URL | |
resty $URL_ROOT | |
# Once we have "resty" loaded and configured we can loop through the CSV file | |
# line by line | |
cat $DEFAULT_INPUT|while read line | |
do | |
# For each line we extract the first column and store it in the "chembl_id" variable | |
read -d, chembl_id < <(echo $line) | |
# Just prinint the id here to see if reading from CSV works... | |
echo "Targets for: $chembl_id" | |
# For each chembl_id extracted from the file we make a single API call to retrive | |
# information about compound bioactivities, extract target ids using 'jq', sort them | |
# and making resulting target ids unique. | |
targets=$(GET "/activity.json" -q "molecule_chembl_id=$chembl_id" | jq '.activities[].target_chembl_id' | tr -d '"' | sort | uniq | tr "\n" "," ) | |
GET "/activity.json" -q "molecule_chembl_id=$chembl_id" | jq '.activities[].target_chembl_id' | |
echo "$chembl_id,$targets" >> $DEFAULT_OUTPUT | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment