Created
June 18, 2019 10:15
-
-
Save avrilcoghlan/b5766dcd127bd5e380a0e64cbd668065 to your computer and use it in GitHub Desktop.
Script to retrieve PDB entry ids for PDB entries that contain PDB ligand ids in an input list of PDB ligand ids.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# example from https://github.com/PDBeurope/PDBe_Programming/blob/master/REST_API/snippets/basic_get_post.py | |
# edited to use the python 'requests' module, and to get the PDB ids. for an input list of PDB ligand ids. | |
import argparse | |
import sys | |
import requests # this is used to access json files | |
PY3 = sys.version > '3' | |
if PY3: | |
import urllib.request as urllib2 | |
else: | |
import urllib2 | |
SERVER_URL = "https://www.ebi.ac.uk/pdbe/api" | |
INPDB = "/pdb/compound/in_pdb" | |
#====================================================================# | |
def get_request(url, arg, pretty=False): | |
full_url = "%s/%s/%s?pretty=%s" % (SERVER_URL, url, arg, str(pretty).lower()) | |
# e.g. for ligand ID. 'ATP' we get: | |
# full_url = https://www.ebi.ac.uk/pdbe/api//pdb/compound/in_pdb/ATP?pretty=true | |
# print("This is the url string:\n{}".format(full_url)) | |
json_results = requests.get( full_url ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format | |
length_json_results = len(json_results) | |
if length_json_results > 0: # in some cases unichem says a particular chembl id. maps to a particular PDB ligand id, but I find that ligand is not in any PDB entry | |
# pull out the list of PDB ids. that have this ligand ID: | |
list_of_pdb_ids = json_results[arg] # 'arg' is the input ligand ID e.g. ATP | |
# print out the list of PDB ids.: | |
# for pdb_id in range(len(list_of_pdb_ids)): | |
# print(list_of_pdb_ids[pdb_id]) | |
else: | |
list_of_pdb_ids = ["NA"] | |
return(list_of_pdb_ids) | |
#====================================================================# | |
def read_ligand_idlist(ligandidlistfile): | |
idlist = list() | |
chemblid_dict = dict() | |
inputfileObj = open(ligandidlistfile, "r") | |
# myscriptaa.o:1 CHEMBL10 NA | |
# myscriptaa.o:2 CHEMBL1000 NA | |
# myscriptaa.o:3 CHEMBL100017 NA | |
for line in inputfileObj: | |
line = line.rstrip() | |
temp = line.split() | |
chemblid = temp[1] | |
ligandid = temp[2] | |
if ligandid != 'NA': | |
assert(ligandid not in idlist) | |
idlist.append(ligandid) | |
assert(ligandid not in chemblid_dict) | |
chemblid_dict[ligandid] = chemblid | |
inputfileObj.close() | |
return(idlist, chemblid_dict) | |
#====================================================================# | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) | |
parser.add_argument('-e', type=str, default=None, action='store', help='the ligandIDlist') | |
parser.add_argument('-o', type=str, default=None, action='store', help='the outputfile') | |
args = parser.parse_args() | |
# If you type: | |
# % python3 pdb_rest_example_get_pdbids_with_ligandidlist.py | |
# You will see: | |
# usage: pdb_rest_example_get_pdbids_with_ligandidlist.py [-h] [-e E] [-o O] | |
# | |
# optional arguments: | |
# -h, --help show this help message and exit | |
# -e E the ligandIDlist | |
# -o O the outputfile | |
# Note we defined at the top of the script that: | |
# INPDB = "/pdb/compound/in_pdb" | |
if args.e: | |
# now read in the list of ligand ids: | |
(ligand_idlist,chemblid_dict) = read_ligand_idlist(args.e) | |
else: | |
parser.print_help() | |
sys.exit(1) | |
# get the PDB ids for each of the ligand ids: | |
cnt = 0 | |
outputfileObj = open(args.o,"w") | |
for ligand_id in ligand_idlist: | |
# Note we defined at the top of the script that: | |
# INPDB = "/pdb/compound/in_pdb" | |
pdb_ids = get_request(INPDB, ligand_id, True) | |
pdb_ids_string = ','.join(pdb_ids) | |
cnt += 1 | |
assert(ligand_id in chemblid_dict) | |
chembl_id = chemblid_dict[ligand_id] | |
output_line = "%d %s %s %s\n" % (cnt,chembl_id,ligand_id,pdb_ids_string) | |
outputfileObj.write(output_line) | |
sys.stdout.write(output_line) | |
outputfileObj.close() | |
print("FINISHED\n") | |
#====================================================================# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment