Created
July 12, 2017 08:54
-
-
Save anonymous/a2bdfb301d82f6bae0a623be5e99fabf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script will read a CSV file containing a list of compunds | |
# indentified by their chembl ids. For each compound it will download | |
# its image and save it to the output folder. | |
# Some constants first: we need to know the root URL of the ChEMBL API. | |
# We will also use a tool called "resty" and we first need to download it | |
# from the Internet so we need to know its adress. | |
DEFAULT_INPUT=${1-"./compounds_list.csv"} | |
DEFAULT_OUTPUT=${1-"./images"} | |
URL_ROOT="https://www.ebi.ac.uk/chembl/api/data" | |
# OK, let's create the output directory | |
mkdir -p $DEFAULT_OUTPUT | |
# Now we can loop through the CSV file line by line | |
cat $DEFAULT_INPUT|while read line | |
do | |
# For each line we extract the first column and store it in the "chembl_id" variable | |
read -d, chembl_id < <(echo $line) | |
# Just prinint the id here to see if reading from CSV works... | |
echo "Getting image for: $chembl_id" | |
# For each chembl_id extracted from the file we make a single API call to retrive | |
# information about this compound in JSON format | |
curl -s ${URL_ROOT}/molecule/$chembl_id.png > $DEFAULT_OUTPUT/$chembl_id.png | |
done | |
# Finally do a cleanup: remove empty files, ones corresponding to compounds | |
# without a structure | |
find $DEFAULT_OUTPUT -size 0 -delete |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment