Skip to content

Instantly share code, notes, and snippets.

@danielparton
Created May 9, 2015 01:21
Show Gist options
  • Save danielparton/c825dec98c360b428de7 to your computer and use it in GitHub Desktop.
Save danielparton/c825dec98c360b428de7 to your computer and use it in GitHub Desktop.
Analysis of disulfide bonds in protein kinases
# for each template:
# from PDB entry:
# print SS bonds within template span
import os
import gzip
import ensembler
from ensembler.uniprot import get_uniprot_xml
from ensembler.initproject import extract_template_pdbchains_from_uniprot_xml, parse_sifts_xml
from ensembler.utils import set_loglevel
set_loglevel('error')
templates_resolved_seq = ensembler.core.get_templates_resolved_seq()
templateids = [t.id for t in templates_resolved_seq]
template_wordlists = [t.split('_') for t in templateids]
uniprot_names = ['_'.join(t[0:2]) for t in template_wordlists]
domainids = [int(t[2][1]) for t in template_wordlists]
pdbids = [t[3] for t in template_wordlists]
chainids = [t[4] for t in template_wordlists]
ssbond_iter = 0
unique_uniprot_names = set()
for t, templateid in enumerate(templateids):
pdbid = pdbids[t]
with gzip.open(os.path.join('structures/pdb', pdbid+'.pdb.gz')) as pdbfile:
lines = pdbfile.read().splitlines()
ssbondlines = [line for line in lines if 'SSBOND' in line]
if len(ssbondlines) > 0:
for ssbondline in ssbondlines:
words = ssbondline.split()
chainid1 = words[3]
resid1 = int(words[4])
chainid2 = words[6]
resid2 = int(words[7])
if (chainid1 == chainid2 and chainid1 == chainids[t] and
resid1 != resid2
):
# get uniprot domain span
uniprot_xml = get_uniprot_xml('mnemonic:{0}'.format(uniprot_names[t]))
pdbchains = extract_template_pdbchains_from_uniprot_xml(uniprot_xml, uniprot_domain_regex='^Protein kinase(?!; truncated)(?!; inactive)', specified_pdbids=[pdbid], specified_chainids={pdbid: [chainids[t]]})
uniprot_residue_span = pdbchains[0]['residue_span']
# get ssbond resids in UniProt coords
siftsxml = parse_sifts_xml(os.path.join('structures/sifts', pdbid+'.xml.gz'))
resid1_pdb_node = siftsxml.find('entity/segment/listResidue/residue/crossRefDb[@dbSource="PDB"][@dbChainId="{0}"][@dbResNum="{1}"]'.format(chainid1, resid1))
resid1_in_uniprot_coords = int(resid1_pdb_node.find('../crossRefDb[@dbSource="UniProt"]').get('dbResNum'))
resid2_pdb_node = siftsxml.find('entity/segment/listResidue/residue/crossRefDb[@dbSource="PDB"][@dbChainId="{0}"][@dbResNum="{1}"]'.format(chainid2, resid2))
resid2_in_uniprot_coords = int(resid2_pdb_node.find('../crossRefDb[@dbSource="UniProt"]').get('dbResNum'))
if (((resid1_in_uniprot_coords >= uniprot_residue_span[0]) and
(resid1_in_uniprot_coords <= uniprot_residue_span[1])) or
((resid2_in_uniprot_coords >= uniprot_residue_span[0]) and
(resid2_in_uniprot_coords <= uniprot_residue_span[1]))):
ssbond_iter += 1
print '{0}: = {1} ='.format(ssbond_iter, templateid)
print ssbondline
print ''
unique_uniprot_names.add(uniprot_names[t])
print 'Unique UniProt entry names:'
print '\n'.join(unique_uniprot_names)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment