Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save avrilcoghlan/04c78d57676b0c36c19b5b170914dd2f to your computer and use it in GitHub Desktop.
Save avrilcoghlan/04c78d57676b0c36c19b5b170914dd2f to your computer and use it in GitHub Desktop.
script to retrieve UniProt ids for an input list of PDB ids.
#!/usr/bin/env python
# example from https://github.com/PDBeurope/PDBe_Programming/blob/master/REST_API/snippets/basic_get_post.py
# edited to use the python 'requests' module, and to get the UniProt id. for particular PDBe entry ids
import argparse
import sys
import requests # this is used to access json files
PY3 = sys.version > '3'
if PY3:
import urllib.request as urllib2
else:
import urllib2
SERVER_URL = "https://www.ebi.ac.uk/pdbe/api"
UNIPROT = "/mappings/uniprot"
#====================================================================#
def get_request(url, arg, pretty=False):
full_url = "%s/%s/%s?pretty=%s" % (SERVER_URL, url, arg, str(pretty).lower())
# e.g. for PDB id. 1ivv we get:
# full_url = https://www.ebi.ac.uk/pdbe/api//mappings/uniprot/1ivv?pretty=true
# print("This is the url string:\n{}".format(full_url))
json_results = requests.get( full_url ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format
# pull out the UniProt id. for this PDB id:
uniprot_id = json_results[arg] # 'arg' is the input PDB ID e.g. 1ivv
uniprot_id2 = uniprot_id["UniProt"]
uniprot_id3 = list(uniprot_id2.keys()) # a list of the UniProt ids. for this input PDB id.
uniprot_ids_string = ','.join(uniprot_id3) # there can be more than one uniprot id in a pdb entry e.g. https://www.ebi.ac.uk/pdbe/entry/pdb/2zwe
# print("UniProt_ids=",uniprot_ids_string)
return uniprot_ids_string
#====================================================================#
def read_pdb_idlist(pdbidlistfile,outputfile):
inputfileObj = open(pdbidlistfile, "r")
# 1 CHEMBL1009 DAH 1ivv,1rnr,2vh3,2zwe,2zwf,2zwg,3teg,3teh,4eis,4p6s,5xdh,5z0f,5z0g,5z0h,5z0i,5z0j,5z0k,5z0l,5z0m,6ebp,6ebz,6gp2,6pah
# 2 CHEMBL101 P1Z 2bxc,2bxp,2bxq,2w98
# 3 CHEMBL101683 8ST 3hng
outputfileObj = open(outputfile,"w")
for line in inputfileObj:
line = line.rstrip()
temp = line.split()
cnt = temp[0]
chembl_id = temp[1]
ligand_id = temp[2]
pdbids = temp[3]
if ligand_id != 'NA':
pdbidlist2 = ""
pdbidlist = pdbids.split(',')
for pdbid in pdbidlist: # note that one pdbid can contain more than one ChEMBL compound, e.g. https://www.ebi.ac.uk/pdbe/entry/pdb/2bxp
# get the uniprot id for this pbbid:
uniprot_ids_string = get_request(UNIPROT, pdbid, True)
pdbidlist2 += "%s(%s)," % (pdbid, uniprot_ids_string)
pdbidlist2 = pdbidlist2[:-1] # remove the last ','
output_line = "%s %s %s %s\n" % (cnt,chembl_id,ligand_id,pdbidlist2)
outputfileObj.write(output_line)
sys.stdout.write(output_line)
inputfileObj.close()
outpufileObj.close()
return
#====================================================================#
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-e', type=str, default=None, action='store', help='the pdbidlistfile')
parser.add_argument('-o', type=str, default=None, action='store', help='the outputfile')
args = parser.parse_args()
# If you type:
# % python3 pdb_rest_example_get_uniprot_for_pdbidlist.py
# You will see:
# usage: pdb_rest_example_get_uniprot_for_pdbidlist.py [-h] [-e E]
#
# optional arguments:
# -h, --help show this help message and exit
# -e E the pdbidlistfile
# -o O the outputfile
# Note we defined at the top of the script that:
# UNIPROT = "/mappings/uniprot"
if args.e:
# now read in the list of pdb ids, and find their uniprot ids:
read_pdb_idlist(args.e,args.o)
else:
parser.print_help()
sys.exit(1)
print("FINISHED\n")
#====================================================================#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment