Skip to content

Instantly share code, notes, and snippets.

@theideasmith
Last active September 15, 2017 12:31
Show Gist options
  • Save theideasmith/830314cad90c8d1d6c254eaf278083ca to your computer and use it in GitHub Desktop.
Save theideasmith/830314cad90c8d1d6c254eaf278083ca to your computer and use it in GitHub Desktop.
Examples for Cluster Training Management System
import sys
import os
projdir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(os.path.join(projdir, "src"))
import batch_train as btrain
import trainkit as tkit
from const import const as const
import numpy as np
from modeldb import traindb, modeldb
import trainkit as tkit
from models import Model, commit as modelcommit
import loaddata as load
import pyhsmm
import pyhsmm.basic.distributions as distributions
import autoregressive.distributions as d
import autoregressive.models as m
import pickle
from tinydb import where
MODELNAME = "3-PCA-EMPIRICAL-ESTIMATOR-ARWeakLimitStickyHDPHMM"
# Obtaining Datasets
alldata = map(
lambda f:os.path.join(const.NPYFILES, f),
os.listdir(const.NPYFILES))
# Models > 132 dont have a dataset. So I need to retrain models 132 and onwards
def get_empirical_ar_params(train_datas, params):
"""
Estimate the parameters of an AR observation model
by fitting a single AR model to the entire dataset.
"""
assert isinstance(train_datas, list) and len(train_datas) > 0
datadimension = train_datas[0].shape[1]
assert params["nu_0"] > datadimension + 1
# Initialize the observation parameters
obs_params = dict(nu_0=params["nu_0"],
S_0=params['S_0'],
M_0=params['M_0'],
K_0=params['K_0'],
affine=params['affine'])
# Fit an AR model to the entire dataset
obs_distn = d.AutoRegression(**obs_params)
obs_distn.max_likelihood(train_datas)
# Use the inferred noise covariance as the prior mean
# E_{IW}[S] = S_0 / (nu_0 - datadimension - 1)
obs_params["S_0"] = obs_distn.sigma * (params["nu_0"] - datadimension - 1)
obs_params["M_0"] = obs_distn.A.copy()
return obs_params
def genmodel_empirical_estimator(datasets, nlags):
Nmax = 4
affine = False
D_obs = 3
prior_ar_params = dict(nu_0=D_obs+2,
S_0=np.eye(D_obs),
M_0=np.hstack((np.eye(D_obs), np.zeros((D_obs, D_obs*(nlags-1)+affine)))),
K_0=np.eye(D_obs*nlags+affine),
affine=affine)
prior_ar_params = get_empirical_ar_params([dset.T for dset in datasets], prior_ar_params)
model = m.ARWeakLimitStickyHDPHMM(
alpha=4.,
kappa=1e4,
gamma=4.,
init_state_distn='uniform',
obs_distns=[
d.AutoRegression(**prior_ar_params)
for state in range(Nmax)],
)
return model
def init():
# Loading Databases
nlags = range(1,100)
trainid = traindb.trainrun(
os.path.abspath(__file__),
desc=("After observing no obvious correlation between nlags and MSE performance "
"we would like to assess whether a finer resolution search of nlags hyper-"
"parameterization space yields correlative dynamics of performance versus nlags"),
modelname=MODELNAME,
datafiles=alldata[:len(nlags)],
outputdir=const.TRAINFILES,
inneroutputdirs=["segments", "models","loglikelihoods","plots"],
configuration ={
"arrayjob":True,
"nlags" : nlags,
"narray":len(nlags)
}
)
def run():
# Name of current file
fname= os.path.abspath(__file__)
trains = traindb.db.search(where("genfile")==fname)
train = trains[0]
trainid=train["trainid"]
arraynumber = int(os.environ[const.ARRAYNUMBER]) - 1
nlags = train["configuration"]["nlags"][arraynumber]
nlags_runs = len(train["datafiles"].keys())
loaded_datasets, _ = load.loaddatas(alldata[:nlags_runs], n=3)
# Generating a model object
model = Model(genmodel_empirical_estimator(loaded_datasets, nlags))
# Getting location of the model trained by the ith batch job
# notice that we pass arraynumber...train_modelloc autogenerates
# the path for each job i that is run.
modelloc = tkit.train_modelloc(train, arraynumber=arraynumber)
# Save the model object with pickle
model.write_model(modelloc)
# Btrain will load the model up again.
btrain.run(
trainid,
200,
datasets=loaded_datasets)
tkit.handle_training(init, run)
import sys
import os
projdir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(os.path.join(projdir, "src"))
import batch_train as btrain
import trainkit as tkit
from const import const as const
import numpy as np
from modeldb import TrainDB, ModelDB, traindb, modeldb
from tinydb import where
import trainkit as tkit
from models import Model, commit as modelcommit
import pyhsmm
import pyhsmm.basic.distributions as distributions
import autoregressive.distributions as d
import autoregressive.models as m
import pickle
def init():
Nmax = 10
affine = False
nlags = 3
D_obs = 3
prior_ar_params = \
dict(nu_0=D_obs+2,
S_0=np.eye(D_obs),
M_0=np.hstack((np.eye(D_obs), np.zeros((D_obs, D_obs*(nlags-1)+affine)))),
K_0=np.eye(D_obs*nlags+affine),
affine=affine)
model = Model(
m.ARWeakLimitStickyHDPHMM(
alpha=4.,
kappa=10**4,
gamma=4,
init_state_distn='uniform',
obs_distns=[
d.AutoRegression(**prior_ar_params)
for state in range(Nmax)],
),
pcas=D_obs)
alldata = map(
lambda f:os.path.join(const.NPYFILES, f),
os.listdir(const.NPYFILES))
datasets = alldata[0:2]
modeldb = ModelDB(location=const.MODELDB)
traindb = TrainDB(modeldb, location=const.TRAINDB)
# Logging training run
trainid = traindb.trainrun(
os.path.abspath(__file__),
desc="Training a simple model",
modelname="3-PCA-ARWeakLimitStickyHDPHMM",
datafiles=datasets,
inneroutputdirs=["logliklihoods"],
outputdir=const.TRAINFILES
)
train = traindb.getTrain(trainid)
# writing model
model.write_model(
tkit.train_modelloc(train))
def run():
fname= os.path.abspath(__file__)
trains = traindb.db.search(where("genfile")==fname)
train = trains[0]
trainid=train["trainid"]
btrain.run(trainid, 1000)
tkit.handle_training(init, run)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment