Skip to content

Instantly share code, notes, and snippets.

@maxentile
Created August 4, 2016 14:30
Show Gist options
  • Save maxentile/7292054893903502e878a5b78233d849 to your computer and use it in GitHub Desktop.
Save maxentile/7292054893903502e878a5b78233d849 to your computer and use it in GitHub Desktop.
Toy example of using shuffle-split cross-validated GMRQ
from sklearn.cross_validation import ShuffleSplit
from msmbuilder.msm import MarkovStateModel
import numpy as np
# just generating random dtrajs: replace these lines with actual data
n_states = 500
traj_length = 10000
n_trajs = 100
dtrajs = [np.random.randint(0, n_states, traj_length) for _ in range(n_trajs)]
# m = the number of propagator eigenfunctions to look at in GMRQ
# --> how do we choose this?
m = 100
# construct a bunch of shuffle-split partitions (n_iter of them), each with a 50:50 split between training and validation data
shuffle_split = ShuffleSplit(len(dtrajs), n_iter=100, test_size=0.5, random_state=0)
# where we'll store results
train_scores = []
val_scores = []
msms = []
i_s = []
# iterate over all splits
for i,(train, validate) in enumerate(shuffle_split):
try:
# collect all the dtrajs into the current train vs. validation split
train_dtrajs = [dtrajs[i] for i in train]
val_dtrajs = [dtrajs[i] for i in validate]
# parameterize an MSM
msm = MarkovStateModel(lag_time = 10, n_timescales = m)
msm.fit(train_dtrajs)
msms.append(msm)
train = msm.score_
val = msm.score(val_dtrajs)
train_scores.append(train)
val_scores.append(val)
i_s.append(i)
except:
# sometimes evaluating the validation score fails for unclear reasons
print('Something went wrong!')
pass
print('Train scores (mean, stdev):',np.mean(train_scores),np.std(train_scores))
print('Validation scores (mean, stdev):',np.mean(val_scores),np.std(val_scores))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment