Created
May 9, 2015 01:21
-
-
Save danielparton/c825dec98c360b428de7 to your computer and use it in GitHub Desktop.
Analysis of disulfide bonds in protein kinases
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for each template: | |
# from PDB entry: | |
# print SS bonds within template span | |
import os | |
import gzip | |
import ensembler | |
from ensembler.uniprot import get_uniprot_xml | |
from ensembler.initproject import extract_template_pdbchains_from_uniprot_xml, parse_sifts_xml | |
from ensembler.utils import set_loglevel | |
set_loglevel('error') | |
templates_resolved_seq = ensembler.core.get_templates_resolved_seq() | |
templateids = [t.id for t in templates_resolved_seq] | |
template_wordlists = [t.split('_') for t in templateids] | |
uniprot_names = ['_'.join(t[0:2]) for t in template_wordlists] | |
domainids = [int(t[2][1]) for t in template_wordlists] | |
pdbids = [t[3] for t in template_wordlists] | |
chainids = [t[4] for t in template_wordlists] | |
ssbond_iter = 0 | |
unique_uniprot_names = set() | |
for t, templateid in enumerate(templateids): | |
pdbid = pdbids[t] | |
with gzip.open(os.path.join('structures/pdb', pdbid+'.pdb.gz')) as pdbfile: | |
lines = pdbfile.read().splitlines() | |
ssbondlines = [line for line in lines if 'SSBOND' in line] | |
if len(ssbondlines) > 0: | |
for ssbondline in ssbondlines: | |
words = ssbondline.split() | |
chainid1 = words[3] | |
resid1 = int(words[4]) | |
chainid2 = words[6] | |
resid2 = int(words[7]) | |
if (chainid1 == chainid2 and chainid1 == chainids[t] and | |
resid1 != resid2 | |
): | |
# get uniprot domain span | |
uniprot_xml = get_uniprot_xml('mnemonic:{0}'.format(uniprot_names[t])) | |
pdbchains = extract_template_pdbchains_from_uniprot_xml(uniprot_xml, uniprot_domain_regex='^Protein kinase(?!; truncated)(?!; inactive)', specified_pdbids=[pdbid], specified_chainids={pdbid: [chainids[t]]}) | |
uniprot_residue_span = pdbchains[0]['residue_span'] | |
# get ssbond resids in UniProt coords | |
siftsxml = parse_sifts_xml(os.path.join('structures/sifts', pdbid+'.xml.gz')) | |
resid1_pdb_node = siftsxml.find('entity/segment/listResidue/residue/crossRefDb[@dbSource="PDB"][@dbChainId="{0}"][@dbResNum="{1}"]'.format(chainid1, resid1)) | |
resid1_in_uniprot_coords = int(resid1_pdb_node.find('../crossRefDb[@dbSource="UniProt"]').get('dbResNum')) | |
resid2_pdb_node = siftsxml.find('entity/segment/listResidue/residue/crossRefDb[@dbSource="PDB"][@dbChainId="{0}"][@dbResNum="{1}"]'.format(chainid2, resid2)) | |
resid2_in_uniprot_coords = int(resid2_pdb_node.find('../crossRefDb[@dbSource="UniProt"]').get('dbResNum')) | |
if (((resid1_in_uniprot_coords >= uniprot_residue_span[0]) and | |
(resid1_in_uniprot_coords <= uniprot_residue_span[1])) or | |
((resid2_in_uniprot_coords >= uniprot_residue_span[0]) and | |
(resid2_in_uniprot_coords <= uniprot_residue_span[1]))): | |
ssbond_iter += 1 | |
print '{0}: = {1} ='.format(ssbond_iter, templateid) | |
print ssbondline | |
print '' | |
unique_uniprot_names.add(uniprot_names[t]) | |
print 'Unique UniProt entry names:' | |
print '\n'.join(unique_uniprot_names) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment