Skip to content

Instantly share code, notes, and snippets.

@JoaoRodrigues
Created March 19, 2015 17:10
Show Gist options
  • Save JoaoRodrigues/3442d0c404f1feb1c0cb to your computer and use it in GitHub Desktop.
Save JoaoRodrigues/3442d0c404f1feb1c0cb to your computer and use it in GitHub Desktop.
Calculates per residue displacement in an ensemble of structures (based on CA atoms)
#!/usr/bin/env python
"""
Analyzes the RMSD on a residue basis for an ensemble
of protein structures.
"""
from __future__ import print_function, division
import argparse
import os
import sys
import numpy as np
from Bio.PDB import PDBParser, Superimposer, PDBIO
from Bio.PDB.Polypeptide import is_aa
if __name__ == '__main__':
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument('ensemble', type=str,
help='PDB file of the ensemble of structures')
cmd = ap.parse_args()
pdbf = cmd.ensemble
if not os.path.isfile(pdbf):
raise Exception('Could not read PDB file: {0}'.format(pdbf))
# Parse ensemble
p = PDBParser(QUIET=1)
s = p.get_structure('xyz', os.path.abspath(pdbf))
print('[+] Read PDB file: {0}'.format(pdbf))
# Get reference atoms (first model)
# Use CA only
refe = s[0]
refe_atoms = [res['CA'] for res in refe.get_residues() if is_aa(res)]
print('[+] Calculating r.m.s. on {0} CA atoms'.format(len(refe_atoms)))
# Superimpose all structures on the first model
si = Superimposer()
ca_distances = []
for model in s:
model_atoms = [res['CA'] for res in model.get_residues() if is_aa(res)]
si.set_atoms(refe_atoms, model_atoms)
if model.id == refe.id:
#Check for self/self get zero RMS, zero translation
#and identity matrix for the rotation.
assert np.abs(si.rms) < 0.0000001
assert np.max(np.abs(si.rotran[1])) < 0.000001
assert np.max(np.abs(si.rotran[0]) - np.identity(3)) < 0.000001
else:
si.apply(model.get_atoms())
print('[+] Model {0} r.m.s. = {1:3.2f}'.format(model.id+1, si.rms))
# For every CA atom calculate distance to reference equivalent CA
ca_distances.append( [i-j for i,j in zip(refe_atoms, model_atoms)] )
# Calculate average displacement
n_models = len(ca_distances)
ca_distances = zip(*ca_distances)
ca_rmsd = [(sum(map(lambda x: x**2, i))/n_models)**0.5 for i in ca_distances]
# Normalize displacement
max_rmsd = max(ca_rmsd)
min_rmsd = min(ca_rmsd)
norm_ca_rmsd = [(rmsd-min_rmsd)/max_rmsd for rmsd in ca_rmsd]
# Load as bfactors in first model
for residue, res_displacement in zip(refe.get_residues(), norm_ca_rmsd):
if is_aa(residue):
new_bfactor = res_displacement
else:
new_bfactor = 0.0
for atom in residue:
atom.bfactor = new_bfactor
# Save refe as new PDB file
io = PDBIO()
io.set_structure(refe)
io.save('{0}.pdb'.format(pdbf[:-4]+'_per-residue-rmsd'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment