Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Script to retrieve PDB entry ids for PDB entries that contain PDB ligand ids in an input list of PDB ligand ids.
#!/usr/bin/env python
# example from https://github.com/PDBeurope/PDBe_Programming/blob/master/REST_API/snippets/basic_get_post.py
# edited to use the python 'requests' module, and to get the PDB ids. for an input list of PDB ligand ids.
import argparse
import sys
import requests # this is used to access json files
PY3 = sys.version > '3'
if PY3:
import urllib.request as urllib2
else:
import urllib2
SERVER_URL = "https://www.ebi.ac.uk/pdbe/api"
INPDB = "/pdb/compound/in_pdb"
#====================================================================#
def get_request(url, arg, pretty=False):
full_url = "%s/%s/%s?pretty=%s" % (SERVER_URL, url, arg, str(pretty).lower())
# e.g. for ligand ID. 'ATP' we get:
# full_url = https://www.ebi.ac.uk/pdbe/api//pdb/compound/in_pdb/ATP?pretty=true
# print("This is the url string:\n{}".format(full_url))
json_results = requests.get( full_url ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format
length_json_results = len(json_results)
if length_json_results > 0: # in some cases unichem says a particular chembl id. maps to a particular PDB ligand id, but I find that ligand is not in any PDB entry
# pull out the list of PDB ids. that have this ligand ID:
list_of_pdb_ids = json_results[arg] # 'arg' is the input ligand ID e.g. ATP
# print out the list of PDB ids.:
# for pdb_id in range(len(list_of_pdb_ids)):
# print(list_of_pdb_ids[pdb_id])
else:
list_of_pdb_ids = ["NA"]
return(list_of_pdb_ids)
#====================================================================#
def read_ligand_idlist(ligandidlistfile):
idlist = list()
chemblid_dict = dict()
inputfileObj = open(ligandidlistfile, "r")
# myscriptaa.o:1 CHEMBL10 NA
# myscriptaa.o:2 CHEMBL1000 NA
# myscriptaa.o:3 CHEMBL100017 NA
for line in inputfileObj:
line = line.rstrip()
temp = line.split()
chemblid = temp[1]
ligandid = temp[2]
if ligandid != 'NA':
assert(ligandid not in idlist)
idlist.append(ligandid)
assert(ligandid not in chemblid_dict)
chemblid_dict[ligandid] = chemblid
inputfileObj.close()
return(idlist, chemblid_dict)
#====================================================================#
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-e', type=str, default=None, action='store', help='the ligandIDlist')
parser.add_argument('-o', type=str, default=None, action='store', help='the outputfile')
args = parser.parse_args()
# If you type:
# % python3 pdb_rest_example_get_pdbids_with_ligandidlist.py
# You will see:
# usage: pdb_rest_example_get_pdbids_with_ligandidlist.py [-h] [-e E] [-o O]
#
# optional arguments:
# -h, --help show this help message and exit
# -e E the ligandIDlist
# -o O the outputfile
# Note we defined at the top of the script that:
# INPDB = "/pdb/compound/in_pdb"
if args.e:
# now read in the list of ligand ids:
(ligand_idlist,chemblid_dict) = read_ligand_idlist(args.e)
else:
parser.print_help()
sys.exit(1)
# get the PDB ids for each of the ligand ids:
cnt = 0
outputfileObj = open(args.o,"w")
for ligand_id in ligand_idlist:
# Note we defined at the top of the script that:
# INPDB = "/pdb/compound/in_pdb"
pdb_ids = get_request(INPDB, ligand_id, True)
pdb_ids_string = ','.join(pdb_ids)
cnt += 1
assert(ligand_id in chemblid_dict)
chembl_id = chemblid_dict[ligand_id]
output_line = "%d %s %s %s\n" % (cnt,chembl_id,ligand_id,pdb_ids_string)
outputfileObj.write(output_line)
sys.stdout.write(output_line)
outputfileObj.close()
print("FINISHED\n")
#====================================================================#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.