Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avrilcoghlan/2a2fb40973a3138a33a1f52a5f12a6f7 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/2a2fb40973a3138a33a1f52a5f12a6f7 to your computer and use it in GitHub Desktop.
Script to retrieve the PDB ligand ids. for a list of input ChEMBL compounds
#!/usr/bin/env python
# script to find out the PDB three-letter ligand id. for a ChEMBL id., using UniChem
import argparse
import sys
import requests # this is used to access json files
PY3 = sys.version > '3'
if PY3:
import urllib.request as urllib2
else:
import urllib2
SERVER_URL = "https://www.ebi.ac.uk/unichem/rest"
UNICHEM = "/src_compound_id"
#====================================================================#
def get_request(url, arg, pretty=False):
full_url = "%s/%s/%s/1/3" % (SERVER_URL, url, arg)
# e.g. for ChEMBL id. CHEMBL14249 we get:
# full_url = https://www.ebi.ac.uk/unichem/rest//src_compound_id/CHEMBL14249/1/3
# print("This is the url string:\n{}".format(full_url))
json_results = requests.get( full_url ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format
# e.g. [{'src_compound_id': 'ATP'}]
# This seems to have been changed from json format to Python format by the 'requests' module.
if len(json_results) > 0 and 'error' not in json_results:
# Pull out the dictionary:
json_results2 = json_results[0] # e.g. a dictionary like {'src_compound_id': 'ATP'}
# pull out the PDB ligand id. for this ChEMBL id.:
ligand_id = json_results2['src_compound_id']
else:
ligand_id = "NA"
# print("Ligand id. in PDB:",ligand_id)
return(ligand_id)
#====================================================================#
def read_chembl_idlist(chemblidlistfile):
idlist = list()
inputfileObj = open(chemblidlistfile, "r")
for line in inputfileObj:
line = line.rstrip()
temp = line.split("\t")
chemblid = temp[0]
idlist.append(chemblid)
inputfileObj.close()
return idlist
#====================================================================#
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-e', type=str, default=None, action='store', help='the chemblID list file')
args = parser.parse_args()
# If you type:
# % python3 unichem_rest_example_get_pdbligandids_for_chemblidlist.py
# usage: unichem_rest_example_get_pdbligandids_for_chemblidlist.py [-h] [-e E]
# optional arguments:
# -h, --help show this help message and exit
# -e E the chemblID list file
if args.e:
# now read in the list of chembl ids:
chembl_idlist = read_chembl_idlist(args.e)
else:
parser.print_help()
sys.exit(1)
# get the PDB ligand id. for each of the chembl ids:
cnt = 0
for chembl_id in chembl_idlist:
# Note we defined at the top of the script that:
# UNICHEM = "/rest/src_compound_id"
pdbligand_id = get_request(UNICHEM, chembl_id, True)
cnt += 1
print(cnt,chembl_id,pdbligand_id)
print("FINISHED\n")
#====================================================================#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment