Last active
September 15, 2017 12:31
-
-
Save theideasmith/830314cad90c8d1d6c254eaf278083ca to your computer and use it in GitHub Desktop.
Examples for Cluster Training Management System
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
projdir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
sys.path.append(os.path.join(projdir, "src")) | |
import batch_train as btrain | |
import trainkit as tkit | |
from const import const as const | |
import numpy as np | |
from modeldb import traindb, modeldb | |
import trainkit as tkit | |
from models import Model, commit as modelcommit | |
import loaddata as load | |
import pyhsmm | |
import pyhsmm.basic.distributions as distributions | |
import autoregressive.distributions as d | |
import autoregressive.models as m | |
import pickle | |
from tinydb import where | |
MODELNAME = "3-PCA-EMPIRICAL-ESTIMATOR-ARWeakLimitStickyHDPHMM" | |
# Obtaining Datasets | |
alldata = map( | |
lambda f:os.path.join(const.NPYFILES, f), | |
os.listdir(const.NPYFILES)) | |
# Models > 132 dont have a dataset. So I need to retrain models 132 and onwards | |
def get_empirical_ar_params(train_datas, params): | |
""" | |
Estimate the parameters of an AR observation model | |
by fitting a single AR model to the entire dataset. | |
""" | |
assert isinstance(train_datas, list) and len(train_datas) > 0 | |
datadimension = train_datas[0].shape[1] | |
assert params["nu_0"] > datadimension + 1 | |
# Initialize the observation parameters | |
obs_params = dict(nu_0=params["nu_0"], | |
S_0=params['S_0'], | |
M_0=params['M_0'], | |
K_0=params['K_0'], | |
affine=params['affine']) | |
# Fit an AR model to the entire dataset | |
obs_distn = d.AutoRegression(**obs_params) | |
obs_distn.max_likelihood(train_datas) | |
# Use the inferred noise covariance as the prior mean | |
# E_{IW}[S] = S_0 / (nu_0 - datadimension - 1) | |
obs_params["S_0"] = obs_distn.sigma * (params["nu_0"] - datadimension - 1) | |
obs_params["M_0"] = obs_distn.A.copy() | |
return obs_params | |
def genmodel_empirical_estimator(datasets, nlags): | |
Nmax = 4 | |
affine = False | |
D_obs = 3 | |
prior_ar_params = dict(nu_0=D_obs+2, | |
S_0=np.eye(D_obs), | |
M_0=np.hstack((np.eye(D_obs), np.zeros((D_obs, D_obs*(nlags-1)+affine)))), | |
K_0=np.eye(D_obs*nlags+affine), | |
affine=affine) | |
prior_ar_params = get_empirical_ar_params([dset.T for dset in datasets], prior_ar_params) | |
model = m.ARWeakLimitStickyHDPHMM( | |
alpha=4., | |
kappa=1e4, | |
gamma=4., | |
init_state_distn='uniform', | |
obs_distns=[ | |
d.AutoRegression(**prior_ar_params) | |
for state in range(Nmax)], | |
) | |
return model | |
def init(): | |
# Loading Databases | |
nlags = range(1,100) | |
trainid = traindb.trainrun( | |
os.path.abspath(__file__), | |
desc=("After observing no obvious correlation between nlags and MSE performance " | |
"we would like to assess whether a finer resolution search of nlags hyper-" | |
"parameterization space yields correlative dynamics of performance versus nlags"), | |
modelname=MODELNAME, | |
datafiles=alldata[:len(nlags)], | |
outputdir=const.TRAINFILES, | |
inneroutputdirs=["segments", "models","loglikelihoods","plots"], | |
configuration ={ | |
"arrayjob":True, | |
"nlags" : nlags, | |
"narray":len(nlags) | |
} | |
) | |
def run(): | |
# Name of current file | |
fname= os.path.abspath(__file__) | |
trains = traindb.db.search(where("genfile")==fname) | |
train = trains[0] | |
trainid=train["trainid"] | |
arraynumber = int(os.environ[const.ARRAYNUMBER]) - 1 | |
nlags = train["configuration"]["nlags"][arraynumber] | |
nlags_runs = len(train["datafiles"].keys()) | |
loaded_datasets, _ = load.loaddatas(alldata[:nlags_runs], n=3) | |
# Generating a model object | |
model = Model(genmodel_empirical_estimator(loaded_datasets, nlags)) | |
# Getting location of the model trained by the ith batch job | |
# notice that we pass arraynumber...train_modelloc autogenerates | |
# the path for each job i that is run. | |
modelloc = tkit.train_modelloc(train, arraynumber=arraynumber) | |
# Save the model object with pickle | |
model.write_model(modelloc) | |
# Btrain will load the model up again. | |
btrain.run( | |
trainid, | |
200, | |
datasets=loaded_datasets) | |
tkit.handle_training(init, run) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
projdir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
sys.path.append(os.path.join(projdir, "src")) | |
import batch_train as btrain | |
import trainkit as tkit | |
from const import const as const | |
import numpy as np | |
from modeldb import TrainDB, ModelDB, traindb, modeldb | |
from tinydb import where | |
import trainkit as tkit | |
from models import Model, commit as modelcommit | |
import pyhsmm | |
import pyhsmm.basic.distributions as distributions | |
import autoregressive.distributions as d | |
import autoregressive.models as m | |
import pickle | |
def init(): | |
Nmax = 10 | |
affine = False | |
nlags = 3 | |
D_obs = 3 | |
prior_ar_params = \ | |
dict(nu_0=D_obs+2, | |
S_0=np.eye(D_obs), | |
M_0=np.hstack((np.eye(D_obs), np.zeros((D_obs, D_obs*(nlags-1)+affine)))), | |
K_0=np.eye(D_obs*nlags+affine), | |
affine=affine) | |
model = Model( | |
m.ARWeakLimitStickyHDPHMM( | |
alpha=4., | |
kappa=10**4, | |
gamma=4, | |
init_state_distn='uniform', | |
obs_distns=[ | |
d.AutoRegression(**prior_ar_params) | |
for state in range(Nmax)], | |
), | |
pcas=D_obs) | |
alldata = map( | |
lambda f:os.path.join(const.NPYFILES, f), | |
os.listdir(const.NPYFILES)) | |
datasets = alldata[0:2] | |
modeldb = ModelDB(location=const.MODELDB) | |
traindb = TrainDB(modeldb, location=const.TRAINDB) | |
# Logging training run | |
trainid = traindb.trainrun( | |
os.path.abspath(__file__), | |
desc="Training a simple model", | |
modelname="3-PCA-ARWeakLimitStickyHDPHMM", | |
datafiles=datasets, | |
inneroutputdirs=["logliklihoods"], | |
outputdir=const.TRAINFILES | |
) | |
train = traindb.getTrain(trainid) | |
# writing model | |
model.write_model( | |
tkit.train_modelloc(train)) | |
def run(): | |
fname= os.path.abspath(__file__) | |
trains = traindb.db.search(where("genfile")==fname) | |
train = trains[0] | |
trainid=train["trainid"] | |
btrain.run(trainid, 1000) | |
tkit.handle_training(init, run) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment