Skip to content

Instantly share code, notes, and snippets.

@orbeckst
Last active August 27, 2018 16:59
Show Gist options
  • Save orbeckst/26081375f3ea3152f08bbcc90c14c5eb to your computer and use it in GitHub Desktop.
Save orbeckst/26081375f3ea3152f08bbcc90c14c5eb to your computer and use it in GitHub Desktop.
benchmarking dihedral featurization in MDAnalysis
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#!/usr/bin/env python
import os
import warnings
import sys
import time
from collections import OrderedDict
from itertools import chain
import numpy as np
import MDAnalysis as mda
import MDAnalysis.analysis.dihedrals
class StopWatch(OrderedDict):
fmt = "{0:40s} {1:10.4f} s"
def tic(self, label):
if label in self:
raise ValueError("label {} already exists".format(label))
self[label] = time.time()
def show(self):
print("--------------------------[ TIMING ]--------------------")
labels = list(self.keys())
start = labels[0]
for stop in labels[1:]:
dt = self[stop] - self[start]
print(self.fmt.format(stop, dt))
start = stop
print("".join(56*["-"]))
print(self.fmt.format("total time",
self[labels[-1]] - self[labels[0]]))
print("".join(56*["-"]))
def residues_to_dihedral_groups(residues):
"""Return list of [phi1, psi1, phi2, psi2, ...] atom groups"""
# flatten the list, see https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python
return list(chain.from_iterable(
(res.phi_selection(), res.psi_selection())
for res in residues))
def convert_trajectory(top, trj_in, trj_out, force=False):
if os.path.exists(trj_out) and not force:
warnings.warn("output trajectory {} exists, no conversion".format(trj_out))
return trj_out
u = mda.Universe(top, trj_in)
with mda.Writer(trj_out, n_atoms=u.atoms.n_atoms) as W:
for ts in u.trajectory:
W.write(u.atoms)
return trj_out
if __name__ == "__main__":
print(mda.__version__)
nstep = 5
FILESDIR = os.path.abspath(os.path.join(
os.path.dirname(__file__), os.path.pardir, 'files'))
TOP = os.path.join(FILESDIR, 'adk4AKE.psf')
DCD = os.path.join(FILESDIR, '1ake_007-nowater-core-dt240ps.dcd')
XTC = os.path.join(FILESDIR, '1ake_007-nowater-core-dt240ps.xtc')
convert_trajectory(TOP, DCD, XTC)
timer = StopWatch()
timer.tic('init')
u = mda.Universe(TOP, XTC)
timer.tic('load Universe')
mobile = u.select_atoms("protein")
residues = mobile.residues[1:-1]
timer.tic('select protein')
dihedral_groups = residues_to_dihedral_groups(residues)
timer.tic('generate dihedral groups')
DA = MDAnalysis.analysis.dihedrals.Dihedral(dihedral_groups, step=nstep)
timer.tic('setup MDAnalysis.analysis.dihedrals.Dihedral')
# 1 frame
#timer.tic('featurize_dihedrals() (1 step)')
nframes = len(u.trajectory[::nstep])
timer.tic("get trajectory length")
DA.run()
timer.tic('featurize_dihedrals() ({} frames)'.format(nframes))
# reformat as a feature vector of [cos(x) cos(x) ... sin(x) sin(x)...]
# per frame
x = np.deg2rad(DA.angles)
results = np.concatenate((np.cos(x), np.sin(x)), axis=1)
timer.tic('result --> (cos(x), sin(x))')
timer.show()
# per-frame (pulling out data from timer instance is ugly)
label = 'featurize_dihedrals() ({} frames)'.format(nframes)
labels = list(timer.keys())
ix = labels.index(label)
runtime = timer[label] - timer[labels[ix-1]]
runtime_per_frame = runtime / nframes
print("\n")
print("{0:40s} {1:g} s".format("runtime per frame",
runtime_per_frame))
#!/usr/bin/env python
import os
import warnings
import sys
import time
from collections import OrderedDict
from itertools import chain
import numpy as np
import MDAnalysis as mda
class StopWatch(OrderedDict):
fmt = "{0:40s} {1:10.4f} s"
def tic(self, label):
if label in self:
raise ValueError("label {} already exists".format(label))
self[label] = time.time()
def show(self):
print("--------------------------[ TIMING ]--------------------")
labels = list(self.keys())
start = labels[0]
for stop in labels[1:]:
dt = self[stop] - self[start]
print(self.fmt.format(stop, dt))
start = stop
print("".join(56*["-"]))
print(self.fmt.format("total time",
self[labels[-1]] - self[labels[0]]))
print("".join(56*["-"]))
def angle2sincos(x):
"""Convert angle x to (cos x, sin x).
Parameters
----------
x : float or array_like
Returns
-------
feature_vector : array
1D feature vector ``[cos(x[0]), sin(x[0]), cos(x[1]), sin(x[1]), ...]``.
"""
x = np.deg2rad(x)
return np.ravel(np.transpose([np.cos(x), np.sin(x)]))
def residues_to_dihedrals(residues):
"""Return list of [phi1, psi1, phi2, psi2, ...] dihedral objects"""
# flatten the list, see https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python
return list(chain.from_iterable(
(res.phi_selection().dihedral, res.psi_selection().dihedral)
for res in residues))
def featurize_dihedrals(dihedrals):
angles = [dihedral.value() for dihedral in dihedrals]
return angle2sincos(angles)
def convert_trajectory(top, trj_in, trj_out, force=False):
if os.path.exists(trj_out) and not force:
warnings.warn("output trajectory {} exists, no conversion".format(trj_out))
return trj_out
u = mda.Universe(top, trj_in)
with mda.Writer(trj_out, n_atoms=u.atoms.n_atoms) as W:
for ts in u.trajectory:
W.write(u.atoms)
return trj_out
if __name__ == "__main__":
print(mda.__version__)
nstep = 5
FILESDIR = os.path.abspath(os.path.join(
os.path.dirname(__file__), os.path.pardir, 'files'))
TOP = os.path.join(FILESDIR, 'adk4AKE.psf')
DCD = os.path.join(FILESDIR, '1ake_007-nowater-core-dt240ps.dcd')
XTC = os.path.join(FILESDIR, '1ake_007-nowater-core-dt240ps.xtc')
convert_trajectory(TOP, DCD, XTC)
timer = StopWatch()
timer.tic('init')
u = mda.Universe(TOP, XTC)
timer.tic('load Universe')
mobile = u.select_atoms("protein")
residues = mobile.residues[1:-1]
timer.tic('select protein')
dihedrals = residues_to_dihedrals(residues)
timer.tic('generate dihedrals')
# 1 frame
dih_series = featurize_dihedrals(dihedrals)
timer.tic('featurize_dihedrals() (1 step)')
results = []
traj_iter = u.trajectory[::nstep]
nframes = len(traj_iter)
for ts in traj_iter:
results.append(featurize_dihedrals(dihedrals))
timer.tic('featurize_dihedrals() ({} frames)'.format(nframes))
# feature vector of [cos(x) sin(x) ... cos(x) sin(x)]
# per frame
results = np.array(results)
timer.tic('list --> array')
timer.show()
# per-frame (pulling out data from timer instance is ugly)
label = 'featurize_dihedrals() ({} frames)'.format(nframes)
labels = list(timer.keys())
ix = labels.index(label)
runtime = timer[label] - timer[labels[ix-1]]
runtime_per_frame = runtime / nframes
print("\n")
print("{0:40s} {1:g} s".format("runtime per frame",
runtime_per_frame))
@orbeckst
Copy link
Author

Note that the MDAnalysis.analysis.dihedrals.Dihedral functionality was added in PR MDAnalysis/mdanalysis#2033 and will be available in the upcoming release 0.19.0 of MDAnalysis.

At the moment (August 2018) you need to install the development branch of MDAnalysis to use it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment