Created
June 18, 2019 11:18
-
-
Save avrilcoghlan/04c78d57676b0c36c19b5b170914dd2f to your computer and use it in GitHub Desktop.
script to retrieve UniProt ids for an input list of PDB ids.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# example from https://github.com/PDBeurope/PDBe_Programming/blob/master/REST_API/snippets/basic_get_post.py | |
# edited to use the python 'requests' module, and to get the UniProt id. for particular PDBe entry ids | |
import argparse | |
import sys | |
import requests # this is used to access json files | |
PY3 = sys.version > '3' | |
if PY3: | |
import urllib.request as urllib2 | |
else: | |
import urllib2 | |
SERVER_URL = "https://www.ebi.ac.uk/pdbe/api" | |
UNIPROT = "/mappings/uniprot" | |
#====================================================================# | |
def get_request(url, arg, pretty=False): | |
full_url = "%s/%s/%s?pretty=%s" % (SERVER_URL, url, arg, str(pretty).lower()) | |
# e.g. for PDB id. 1ivv we get: | |
# full_url = https://www.ebi.ac.uk/pdbe/api//mappings/uniprot/1ivv?pretty=true | |
# print("This is the url string:\n{}".format(full_url)) | |
json_results = requests.get( full_url ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format | |
# pull out the UniProt id. for this PDB id: | |
uniprot_id = json_results[arg] # 'arg' is the input PDB ID e.g. 1ivv | |
uniprot_id2 = uniprot_id["UniProt"] | |
uniprot_id3 = list(uniprot_id2.keys()) # a list of the UniProt ids. for this input PDB id. | |
uniprot_ids_string = ','.join(uniprot_id3) # there can be more than one uniprot id in a pdb entry e.g. https://www.ebi.ac.uk/pdbe/entry/pdb/2zwe | |
# print("UniProt_ids=",uniprot_ids_string) | |
return uniprot_ids_string | |
#====================================================================# | |
def read_pdb_idlist(pdbidlistfile,outputfile): | |
inputfileObj = open(pdbidlistfile, "r") | |
# 1 CHEMBL1009 DAH 1ivv,1rnr,2vh3,2zwe,2zwf,2zwg,3teg,3teh,4eis,4p6s,5xdh,5z0f,5z0g,5z0h,5z0i,5z0j,5z0k,5z0l,5z0m,6ebp,6ebz,6gp2,6pah | |
# 2 CHEMBL101 P1Z 2bxc,2bxp,2bxq,2w98 | |
# 3 CHEMBL101683 8ST 3hng | |
outputfileObj = open(outputfile,"w") | |
for line in inputfileObj: | |
line = line.rstrip() | |
temp = line.split() | |
cnt = temp[0] | |
chembl_id = temp[1] | |
ligand_id = temp[2] | |
pdbids = temp[3] | |
if ligand_id != 'NA': | |
pdbidlist2 = "" | |
pdbidlist = pdbids.split(',') | |
for pdbid in pdbidlist: # note that one pdbid can contain more than one ChEMBL compound, e.g. https://www.ebi.ac.uk/pdbe/entry/pdb/2bxp | |
# get the uniprot id for this pbbid: | |
uniprot_ids_string = get_request(UNIPROT, pdbid, True) | |
pdbidlist2 += "%s(%s)," % (pdbid, uniprot_ids_string) | |
pdbidlist2 = pdbidlist2[:-1] # remove the last ',' | |
output_line = "%s %s %s %s\n" % (cnt,chembl_id,ligand_id,pdbidlist2) | |
outputfileObj.write(output_line) | |
sys.stdout.write(output_line) | |
inputfileObj.close() | |
outpufileObj.close() | |
return | |
#====================================================================# | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) | |
parser.add_argument('-e', type=str, default=None, action='store', help='the pdbidlistfile') | |
parser.add_argument('-o', type=str, default=None, action='store', help='the outputfile') | |
args = parser.parse_args() | |
# If you type: | |
# % python3 pdb_rest_example_get_uniprot_for_pdbidlist.py | |
# You will see: | |
# usage: pdb_rest_example_get_uniprot_for_pdbidlist.py [-h] [-e E] | |
# | |
# optional arguments: | |
# -h, --help show this help message and exit | |
# -e E the pdbidlistfile | |
# -o O the outputfile | |
# Note we defined at the top of the script that: | |
# UNIPROT = "/mappings/uniprot" | |
if args.e: | |
# now read in the list of pdb ids, and find their uniprot ids: | |
read_pdb_idlist(args.e,args.o) | |
else: | |
parser.print_help() | |
sys.exit(1) | |
print("FINISHED\n") | |
#====================================================================# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment