avrilcoghlan/"pdb_rest_example_get_pdbids_for_ligandidlist.py

## "pdb_rest_example_get_pdbids_for_ligandidlist.py
#!/usr/bin/env python
# example from https://github.com/PDBeurope/PDBe_Programming/blob/master/REST_API/snippets/basic_get_post.py
# edited to use the python 'requests' module, and to get the PDB ids. for an input list of PDB ligand ids.

import argparse
import sys
import requests # this is used to access json files

PY3 = sys.version > '3'

if PY3:
    import urllib.request as urllib2
else:
    import urllib2

SERVER_URL = "https://www.ebi.ac.uk/pdbe/api"

INPDB = "/pdb/compound/in_pdb"

#====================================================================#

def get_request(url, arg, pretty=False):
    full_url = "%s/%s/%s?pretty=%s" % (SERVER_URL, url, arg, str(pretty).lower())
    # e.g. for ligand ID. 'ATP' we get:
    # full_url = https://www.ebi.ac.uk/pdbe/api//pdb/compound/in_pdb/ATP?pretty=true
    # print("This is the url string:\n{}".format(full_url))

    json_results = requests.get( full_url ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format
    length_json_results = len(json_results)

    if length_json_results > 0: # in some cases unichem says a particular chembl id. maps to a particular PDB ligand id, but I find that ligand is not in any PDB entry
        # pull out the list of PDB ids. that have this ligand ID:
        list_of_pdb_ids = json_results[arg] # 'arg' is the input ligand ID e.g. ATP

        # print out the list of PDB ids.:
        # for pdb_id in range(len(list_of_pdb_ids)):
        #     print(list_of_pdb_ids[pdb_id])
    else:
        list_of_pdb_ids = ["NA"]

    return(list_of_pdb_ids)

#====================================================================#

def read_ligand_idlist(ligandidlistfile):

   idlist = list()
   chemblid_dict = dict()

   inputfileObj = open(ligandidlistfile, "r")
   # myscriptaa.o:1 CHEMBL10 NA
   # myscriptaa.o:2 CHEMBL1000 NA
   # myscriptaa.o:3 CHEMBL100017 NA

   for line in inputfileObj:
       line = line.rstrip()
       temp = line.split()
       chemblid = temp[1]
       ligandid = temp[2]
       if ligandid != 'NA':
           assert(ligandid not in idlist)
           idlist.append(ligandid)
           assert(ligandid not in chemblid_dict)
           chemblid_dict[ligandid] = chemblid
   inputfileObj.close()

   return(idlist, chemblid_dict)

#====================================================================#

if __name__ == '__main__':
    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-e', type=str, default=None, action='store', help='the ligandIDlist')
    parser.add_argument('-o', type=str, default=None, action='store', help='the outputfile')
    args = parser.parse_args()

    # If you type:
    # % python3 pdb_rest_example_get_pdbids_with_ligandidlist.py
    # You will see:
    #  usage: pdb_rest_example_get_pdbids_with_ligandidlist.py [-h] [-e E] [-o O]
    #
    # optional arguments:
    #       -h, --help  show this help message and exit
    #       -e E        the ligandIDlist
    #       -o O        the outputfile

    # Note we defined at the top of the script that:
    # INPDB = "/pdb/compound/in_pdb"


    if args.e:
        # now read in the list of ligand ids:
        (ligand_idlist,chemblid_dict) = read_ligand_idlist(args.e)
    else:
        parser.print_help()
        sys.exit(1)

    # get the PDB ids for each of the ligand ids:
    cnt = 0
    outputfileObj = open(args.o,"w")
    for ligand_id in ligand_idlist:
        # Note we defined at the top of the script that:
        # INPDB = "/pdb/compound/in_pdb"
        pdb_ids = get_request(INPDB, ligand_id, True)
        pdb_ids_string = ','.join(pdb_ids)
        cnt += 1
        assert(ligand_id in chemblid_dict)
        chembl_id = chemblid_dict[ligand_id]
        output_line = "%d %s %s %s\n" % (cnt,chembl_id,ligand_id,pdb_ids_string)
        outputfileObj.write(output_line)
        sys.stdout.write(output_line)
    outputfileObj.close()

    print("FINISHED\n")

#====================================================================#
	#!/usr/bin/env python
	# example from https://github.com/PDBeurope/PDBe_Programming/blob/master/REST_API/snippets/basic_get_post.py
	# edited to use the python 'requests' module, and to get the PDB ids. for an input list of PDB ligand ids.

	import argparse
	import sys
	import requests # this is used to access json files

	PY3 = sys.version > '3'

	if PY3:
	import urllib.request as urllib2
	else:
	import urllib2

	SERVER_URL = "https://www.ebi.ac.uk/pdbe/api"

	INPDB = "/pdb/compound/in_pdb"

	#====================================================================#

	def get_request(url, arg, pretty=False):
	full_url = "%s/%s/%s?pretty=%s" % (SERVER_URL, url, arg, str(pretty).lower())
	# e.g. for ligand ID. 'ATP' we get:
	# full_url = https://www.ebi.ac.uk/pdbe/api//pdb/compound/in_pdb/ATP?pretty=true
	# print("This is the url string:\n{}".format(full_url))

	json_results = requests.get( full_url ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format
	length_json_results = len(json_results)

	if length_json_results > 0: # in some cases unichem says a particular chembl id. maps to a particular PDB ligand id, but I find that ligand is not in any PDB entry
	# pull out the list of PDB ids. that have this ligand ID:
	list_of_pdb_ids = json_results[arg] # 'arg' is the input ligand ID e.g. ATP

	# print out the list of PDB ids.:
	# for pdb_id in range(len(list_of_pdb_ids)):
	# print(list_of_pdb_ids[pdb_id])
	else:
	list_of_pdb_ids = ["NA"]

	return(list_of_pdb_ids)

	#====================================================================#

	def read_ligand_idlist(ligandidlistfile):

	idlist = list()
	chemblid_dict = dict()

	inputfileObj = open(ligandidlistfile, "r")
	# myscriptaa.o:1 CHEMBL10 NA
	# myscriptaa.o:2 CHEMBL1000 NA
	# myscriptaa.o:3 CHEMBL100017 NA

	for line in inputfileObj:
	line = line.rstrip()
	temp = line.split()
	chemblid = temp[1]
	ligandid = temp[2]
	if ligandid != 'NA':
	assert(ligandid not in idlist)
	idlist.append(ligandid)
	assert(ligandid not in chemblid_dict)
	chemblid_dict[ligandid] = chemblid
	inputfileObj.close()

	return(idlist, chemblid_dict)

	#====================================================================#

	if __name__ == '__main__':
	parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
	parser.add_argument('-e', type=str, default=None, action='store', help='the ligandIDlist')
	parser.add_argument('-o', type=str, default=None, action='store', help='the outputfile')
	args = parser.parse_args()

	# If you type:
	# % python3 pdb_rest_example_get_pdbids_with_ligandidlist.py
	# You will see:
	# usage: pdb_rest_example_get_pdbids_with_ligandidlist.py [-h] [-e E] [-o O]
	#
	# optional arguments:
	# -h, --help show this help message and exit
	# -e E the ligandIDlist
	# -o O the outputfile

	# Note we defined at the top of the script that:
	# INPDB = "/pdb/compound/in_pdb"


	if args.e:
	# now read in the list of ligand ids:
	(ligand_idlist,chemblid_dict) = read_ligand_idlist(args.e)
	else:
	parser.print_help()
	sys.exit(1)

	# get the PDB ids for each of the ligand ids:
	cnt = 0
	outputfileObj = open(args.o,"w")
	for ligand_id in ligand_idlist:
	# Note we defined at the top of the script that:
	# INPDB = "/pdb/compound/in_pdb"
	pdb_ids = get_request(INPDB, ligand_id, True)
	pdb_ids_string = ','.join(pdb_ids)
	cnt += 1
	assert(ligand_id in chemblid_dict)
	chembl_id = chemblid_dict[ligand_id]
	output_line = "%d %s %s %s\n" % (cnt,chembl_id,ligand_id,pdb_ids_string)
	outputfileObj.write(output_line)
	sys.stdout.write(output_line)
	outputfileObj.close()

	print("FINISHED\n")

	#====================================================================#