Last active
August 27, 2018 16:59
-
-
Save orbeckst/26081375f3ea3152f08bbcc90c14c5eb to your computer and use it in GitHub Desktop.
benchmarking dihedral featurization in MDAnalysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import warnings | |
import sys | |
import time | |
from collections import OrderedDict | |
from itertools import chain | |
import numpy as np | |
import MDAnalysis as mda | |
import MDAnalysis.analysis.dihedrals | |
class StopWatch(OrderedDict): | |
fmt = "{0:40s} {1:10.4f} s" | |
def tic(self, label): | |
if label in self: | |
raise ValueError("label {} already exists".format(label)) | |
self[label] = time.time() | |
def show(self): | |
print("--------------------------[ TIMING ]--------------------") | |
labels = list(self.keys()) | |
start = labels[0] | |
for stop in labels[1:]: | |
dt = self[stop] - self[start] | |
print(self.fmt.format(stop, dt)) | |
start = stop | |
print("".join(56*["-"])) | |
print(self.fmt.format("total time", | |
self[labels[-1]] - self[labels[0]])) | |
print("".join(56*["-"])) | |
def residues_to_dihedral_groups(residues): | |
"""Return list of [phi1, psi1, phi2, psi2, ...] atom groups""" | |
# flatten the list, see https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python | |
return list(chain.from_iterable( | |
(res.phi_selection(), res.psi_selection()) | |
for res in residues)) | |
def convert_trajectory(top, trj_in, trj_out, force=False): | |
if os.path.exists(trj_out) and not force: | |
warnings.warn("output trajectory {} exists, no conversion".format(trj_out)) | |
return trj_out | |
u = mda.Universe(top, trj_in) | |
with mda.Writer(trj_out, n_atoms=u.atoms.n_atoms) as W: | |
for ts in u.trajectory: | |
W.write(u.atoms) | |
return trj_out | |
if __name__ == "__main__": | |
print(mda.__version__) | |
nstep = 5 | |
FILESDIR = os.path.abspath(os.path.join( | |
os.path.dirname(__file__), os.path.pardir, 'files')) | |
TOP = os.path.join(FILESDIR, 'adk4AKE.psf') | |
DCD = os.path.join(FILESDIR, '1ake_007-nowater-core-dt240ps.dcd') | |
XTC = os.path.join(FILESDIR, '1ake_007-nowater-core-dt240ps.xtc') | |
convert_trajectory(TOP, DCD, XTC) | |
timer = StopWatch() | |
timer.tic('init') | |
u = mda.Universe(TOP, XTC) | |
timer.tic('load Universe') | |
mobile = u.select_atoms("protein") | |
residues = mobile.residues[1:-1] | |
timer.tic('select protein') | |
dihedral_groups = residues_to_dihedral_groups(residues) | |
timer.tic('generate dihedral groups') | |
DA = MDAnalysis.analysis.dihedrals.Dihedral(dihedral_groups, step=nstep) | |
timer.tic('setup MDAnalysis.analysis.dihedrals.Dihedral') | |
# 1 frame | |
#timer.tic('featurize_dihedrals() (1 step)') | |
nframes = len(u.trajectory[::nstep]) | |
timer.tic("get trajectory length") | |
DA.run() | |
timer.tic('featurize_dihedrals() ({} frames)'.format(nframes)) | |
# reformat as a feature vector of [cos(x) cos(x) ... sin(x) sin(x)...] | |
# per frame | |
x = np.deg2rad(DA.angles) | |
results = np.concatenate((np.cos(x), np.sin(x)), axis=1) | |
timer.tic('result --> (cos(x), sin(x))') | |
timer.show() | |
# per-frame (pulling out data from timer instance is ugly) | |
label = 'featurize_dihedrals() ({} frames)'.format(nframes) | |
labels = list(timer.keys()) | |
ix = labels.index(label) | |
runtime = timer[label] - timer[labels[ix-1]] | |
runtime_per_frame = runtime / nframes | |
print("\n") | |
print("{0:40s} {1:g} s".format("runtime per frame", | |
runtime_per_frame)) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import warnings | |
import sys | |
import time | |
from collections import OrderedDict | |
from itertools import chain | |
import numpy as np | |
import MDAnalysis as mda | |
class StopWatch(OrderedDict): | |
fmt = "{0:40s} {1:10.4f} s" | |
def tic(self, label): | |
if label in self: | |
raise ValueError("label {} already exists".format(label)) | |
self[label] = time.time() | |
def show(self): | |
print("--------------------------[ TIMING ]--------------------") | |
labels = list(self.keys()) | |
start = labels[0] | |
for stop in labels[1:]: | |
dt = self[stop] - self[start] | |
print(self.fmt.format(stop, dt)) | |
start = stop | |
print("".join(56*["-"])) | |
print(self.fmt.format("total time", | |
self[labels[-1]] - self[labels[0]])) | |
print("".join(56*["-"])) | |
def angle2sincos(x): | |
"""Convert angle x to (cos x, sin x). | |
Parameters | |
---------- | |
x : float or array_like | |
Returns | |
------- | |
feature_vector : array | |
1D feature vector ``[cos(x[0]), sin(x[0]), cos(x[1]), sin(x[1]), ...]``. | |
""" | |
x = np.deg2rad(x) | |
return np.ravel(np.transpose([np.cos(x), np.sin(x)])) | |
def residues_to_dihedrals(residues): | |
"""Return list of [phi1, psi1, phi2, psi2, ...] dihedral objects""" | |
# flatten the list, see https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python | |
return list(chain.from_iterable( | |
(res.phi_selection().dihedral, res.psi_selection().dihedral) | |
for res in residues)) | |
def featurize_dihedrals(dihedrals): | |
angles = [dihedral.value() for dihedral in dihedrals] | |
return angle2sincos(angles) | |
def convert_trajectory(top, trj_in, trj_out, force=False): | |
if os.path.exists(trj_out) and not force: | |
warnings.warn("output trajectory {} exists, no conversion".format(trj_out)) | |
return trj_out | |
u = mda.Universe(top, trj_in) | |
with mda.Writer(trj_out, n_atoms=u.atoms.n_atoms) as W: | |
for ts in u.trajectory: | |
W.write(u.atoms) | |
return trj_out | |
if __name__ == "__main__": | |
print(mda.__version__) | |
nstep = 5 | |
FILESDIR = os.path.abspath(os.path.join( | |
os.path.dirname(__file__), os.path.pardir, 'files')) | |
TOP = os.path.join(FILESDIR, 'adk4AKE.psf') | |
DCD = os.path.join(FILESDIR, '1ake_007-nowater-core-dt240ps.dcd') | |
XTC = os.path.join(FILESDIR, '1ake_007-nowater-core-dt240ps.xtc') | |
convert_trajectory(TOP, DCD, XTC) | |
timer = StopWatch() | |
timer.tic('init') | |
u = mda.Universe(TOP, XTC) | |
timer.tic('load Universe') | |
mobile = u.select_atoms("protein") | |
residues = mobile.residues[1:-1] | |
timer.tic('select protein') | |
dihedrals = residues_to_dihedrals(residues) | |
timer.tic('generate dihedrals') | |
# 1 frame | |
dih_series = featurize_dihedrals(dihedrals) | |
timer.tic('featurize_dihedrals() (1 step)') | |
results = [] | |
traj_iter = u.trajectory[::nstep] | |
nframes = len(traj_iter) | |
for ts in traj_iter: | |
results.append(featurize_dihedrals(dihedrals)) | |
timer.tic('featurize_dihedrals() ({} frames)'.format(nframes)) | |
# feature vector of [cos(x) sin(x) ... cos(x) sin(x)] | |
# per frame | |
results = np.array(results) | |
timer.tic('list --> array') | |
timer.show() | |
# per-frame (pulling out data from timer instance is ugly) | |
label = 'featurize_dihedrals() ({} frames)'.format(nframes) | |
labels = list(timer.keys()) | |
ix = labels.index(label) | |
runtime = timer[label] - timer[labels[ix-1]] | |
runtime_per_frame = runtime / nframes | |
print("\n") | |
print("{0:40s} {1:g} s".format("runtime per frame", | |
runtime_per_frame)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note that the
MDAnalysis.analysis.dihedrals.Dihedral
functionality was added in PR MDAnalysis/mdanalysis#2033 and will be available in the upcoming release 0.19.0 of MDAnalysis.At the moment (August 2018) you need to install the development branch of MDAnalysis to use it.