Skip to content

Instantly share code, notes, and snippets.

@mpharrigan
Last active August 29, 2015 14:19
Show Gist options
  • Save mpharrigan/c2b68d8d015133f1e387 to your computer and use it in GitHub Desktop.
Save mpharrigan/c2b68d8d015133f1e387 to your computer and use it in GitHub Desktop.
Potential msmbuilder workflow
# Basic usage / workflow
ds = dataset("**/*.dcd")
dihed = ds.derive('dihed', fmt='dir-npy')
dihed += DihedralFeaturizer().fit_transform(ds)
tica = dihed.derive('tica', fmt='hdf5')
tica += tICA().fit_transform(dihed)
clusters = tica.derive('clusters', fmt='hdf5')
clusters += KMeans().fit_transform(tica)
msm = MSM().fit(clusters)
dump(msm, 'msm.pickl')
msm = load('msm.pickl')
ds = dataset("**/*.dcd")
inds = msm.sample_from_states()
def structures_from_indices(ds, inds):
"""Maybe implement this as a convenience function in MSMBuilder"""
trajs = []
for i, frame in inds:
trajs += mdtraj.load_frame(ds.meta.loc[i]['traj_fn'], top=ds.meta.loc[i]['top_fn'], frame=frame)
traj = trajs[0].join(trajs[1:])
return traj
traj = structures_from_indices(ds, inds)
traj.save("structures.pdb")
# Support adding data to a dataset
ds = dataset("3rvy/*.dcd", top="3rvy.prmtop")
ds += dataset("4lto/*.dcd", top="4lto.prmtop")
print(ds.meta['top_fn'].unique())
# Transform some new data
tica = load('tica.pkl')
tica_trajs = dataset('tica_trajs')
tica_trajs += tica.transform(ds)
# support adding custom metadata upon opening a dataset
def get_meta(fn):
ma = re.match("RUN([0-9]+)/CLONE([0-9]+)", fn)
n_frames = len(mdtraj.open(fn))
return {'run': ma.group(1), 'clone': ma.group(2), 'n_frames': n_frames}
ds = dataset("RUN*/CLONE*/concat.xtc", meta=get_meta)
print("Using data from", len(ds.meta['run'].unique()), "runs")
@msultan
Copy link

msultan commented Apr 14, 2015

  i like the work flow though i think the learning curve for new comers might be too high.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment