Skip to content

Instantly share code, notes, and snippets.

@sjcockell
Created October 7, 2010 15:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sjcockell/615292 to your computer and use it in GitHub Desktop.
Save sjcockell/615292 to your computer and use it in GitHub Desktop.
import urllib
import os, os.path
from optparse import OptionParser
def main(superfamily):
#fetch the list of domains in the superfamily from the CathDomainList
dom_lst = get_domain_list(superfamily)
#for each domain, retrieve the PDB file from CATH
get_domain_structures(dom_lst, superfamily)
def get_domain_list(superfamily):
count = 0
domain_list = []
sf_tokens = superfamily.split('.')
fh = open(os.path.join("data", "cath", "CathDomainList"), 'r')
for line in fh.readlines():
if not line.startswith('#'): #exclude comment lines
tokens = line.rstrip().split()
#if C, A, T and H match, the domain is a member of the right superfamily
if int(tokens[1]) == int(sf_tokens[0]) \
and int(tokens[2]) == int(sf_tokens[1]) \
and int(tokens[3]) == int(sf_tokens[2]) \
and int(tokens[4]) == int(sf_tokens[3]):
domain_list.append(tokens[0])
count += 1
print("There are "+str(count)+" domains in superfamily "+superfamily)
return domain_list
def get_domain_structures(domain_list, superfamily):
for domain in domain_list:
#can also get chain and full pdb entries by modifying the URL
url = 'http://www.cathdb.info/api/data/pdb/'+domain
pdb = urllib.urlopen(url).read()
if not os.path.exists(os.path.join('data', 'pdb', superfamily)):
os.mkdir(os.path.join('data', 'pdb', superfamily))
out = open(os.path.join('data', 'pdb', superfamily, domain+'.pdb'), 'w')
out.write(pdb)
return url
if __name__ == '__main__':
parser = OptionParser(usage="Usage: %prog [options]", version="%prog 0.1")
parser.add_option("-c", dest="c", help="Class", metavar="CLASS")
parser.add_option("-a", dest="a", help="Architecture", metavar="ARCHITECTURE")
parser.add_option("-t", dest="t", help="Topology", metavar="TOPOLOGY")
parser.add_option("-s", dest="s", help="Homologous Superfamily", metavar="SUPERFAMILY") #would have used -h, but it's reserved for help
(options, args) = parser.parse_args()
sf = options.c+'.'+options.a+'.'+options.t+'.'+options.s
main(sf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment