cvigoe/model.py

## model.py
#######################################################################################################################
# Simulation of Bayesian Dwell Time Model Assuming Error-free Covariates
# Conor Igoe - 2017
#
# Loads relevant data from the test directory given by test_data_filepath
# Initialises the necessary data structures for PyMC3 and logging
# Steps through the loaded test data chronologically
# 	For the 1st data point, with no posteriors to use as priors for the model, uses uniform priors
# 	For all other data points, uses posteriors from the previous datapoint as priors for current point in simulation
# 	Calculates the likelihood
# 	Samples from the posteriors using MCMC
# 	Logs and plots results
# 	Samples from the Posterior Predictive Distribution and uses medians of MCMC samples to inform belief of dwell times
# 	Predicts a dwell time by taing mean of the medians
# 	Logs error between prediction and ground truth at current point in simulation
# Plots the result of simulation: comparison of truth with prediction, residuals, dist. of errors, hyperparam values
#######################################################################################################################


import sys
sys.path.insert(0, '../../data_management')
sys.path.insert(0, '../../analysis')
sys.path.insert(0, '../')
from modelling_utils import *
from utils import *
from datetime import datetime
import os

dt_string = datetime.now().strftime("_%Y_%m_%d_%H_%M_%S")

pp = pprint.PrettyPrinter(width=4,indent=4)

# Load Data
test_data_filepath = '../../data/TEST.csv'
df = read_csvs([test_data_filepath])
df = df[df['DWTIME'] > 0]
df = df.sort_values('ARR').reset_index(drop=True)


# test_data_filepath = []
# for root,dirs,files in os.walk('../../data/long_test'):
# 	for file in files:
# 		if file.endswith(".csv"):
# 			test_data_filepath.append('../../data/long_test/' + file)

# df = read_csvs(test_data_filepath)
# df = df[df['DWTIME'] > 0]
# df = df.sort_values('ARR').reset_index(drop=True)
# print('Length of dataframe: ', len(df))


prediction_plots_dir = '../../assets/centre_ave_aiken'
log_dir = '../logs/'
log_suffix = 'centre_ave_aiken.json'
note = '(centre_ave_aiken)'
# Upper Limit for plotting
CAP = 60

# Flag to allow for cold starting
flag = True
save = True

# Logging Data Structure
log = [{'Arrival Time': 0, 'Observed Dwell': 0, 'Predicted Dwell': 0, 'Observed ON': 0, 'Estimated ON': 0, 'Observed OFF': 0, 'Estimated OFF': 0,
		'beta_1_ON mean': 0, 'beta_1_ON std': 0,'beta_1_OFF mean': 0, 'beta_1_OFF std': 0,
		'beta_2_ON mean': 0, 'beta_2_ON std': 0,'beta_2_OFF mean': 0, 'beta_2_OFF std': 0,
		'bus_route': 0}]

log_keys = ['Arrival Time', 'Observed Dwell', 'Predicted Dwell', 'Observed ON', 'Estimated ON', 'Observed OFF', 'Estimated OFF',
			'beta_1_ON mean', 'beta_1_ON std', 'beta_1_OFF mean', 'beta_1_OFF std',
			'beta_2_ON mean', 'beta_2_ON std', 'beta_2_OFF mean', 'beta_2_OFF std',
			'bus_route']

predictions = []
observations = []
differences = []

hyper_param_1_ON_means = []
hyper_param_1_OFF_means = []
hyper_param_2_ON_means = []
hyper_param_2_OFF_means = []
hyper_param_1_ON_stds = []
hyper_param_1_OFF_stds = []
hyper_param_2_ON_stds = []
hyper_param_2_OFF_stds = []


#######################################################################################################################
# Simulation
#######################################################################################################################

for index, row in df.iterrows():
	log_entry = {}
	print(index)

	# ON, OFF, DWTIME, LOAD = float(row['ON']), float(row['OFF']), float(row['DWTIME']), float(row['LOAD'])
	ON, OFF, DWTIME = float(row['ON']), float(row['OFF']), float(row['DWTIME'])

	log[-1]['Arrival Time'] = str(row['ARR'])
	log[-1]['Observed Dwell'] = DWTIME
	log[-1]['Observed ON'] = ON
	log[-1]['Observed OFF'] = OFF
	# log[-1]['Observed LOAD'] = LOAD

	# ON_shared, OFF_shared, LOAD_shared = shared(ON), shared(OFF), shared(LOAD)
	ON_shared, OFF_shared = shared(ON), shared(OFF)
	bus_route = str(row['ROUTE'])
	log[-1]['bus_route'] = bus_route

	with pm.Model() as basic_model:
		if flag: # no posterior yet - Use Normal as priors
			beta_1_ON = pm.HalfNormal('beta_1_ON_{0}'.format(index), sd=1)
			beta_1_OFF = pm.HalfNormal('beta_1_OFF_{0}'.format(index),sd=1)
			beta_2_ON = pm.HalfNormal('beta_2_ON_{0}'.format(index), sd=1)
			beta_2_OFF = pm.HalfNormal('beta_2_OFF_{0}'.format(index), sd=1)

			hyper_param_1_ON = pm.HalfNormal('hyper_param_1_ON_{0}'.format(index), sd=1)
			hyper_param_1_OFF = pm.HalfNormal('hyper_param_1_OFF_{0}'.format(index), sd=1)
			hyper_param_2_ON = pm.HalfNormal('hyper_param_2_ON_{0}'.format(index), sd=3)
			hyper_param_2_OFF = pm.HalfNormal('hyper_param_2_OFF_{0}'.format(index), sd=3)

			flag = False
		else:
			burnin = int(len(trace) / 5)

			beta_1_ON = from_posterior('beta_1_ON_{0}'.format(index), trace['beta_1_ON_{0}'.format(index - 1)][burnin:])
			beta_1_OFF = from_posterior('beta_1_OFF_{0}'.format(index), trace['beta_1_OFF_{0}'.format(index - 1)][burnin:])
			beta_2_ON = from_posterior('beta_2_ON_{0}'.format(index), trace['beta_2_ON_{0}'.format(index - 1)][burnin:])
			beta_2_OFF = from_posterior('beta_2_OFF_{0}'.format(index), trace['beta_2_OFF_{0}'.format(index - 1)][burnin:])

			hyper_param_1_ON = from_posterior('hyper_param_1_ON_{0}'.format(index), trace['hyper_param_1_ON_{0}'.format(index - 1)][burnin:])
			hyper_param_1_OFF = from_posterior('hyper_param_1_OFF_{0}'.format(index), trace['hyper_param_1_OFF_{0}'.format(index - 1)][burnin:])
			hyper_param_2_ON = from_posterior('hyper_param_2_ON_{0}'.format(index), trace['hyper_param_2_ON_{0}'.format(index - 1)][burnin:])
			hyper_param_2_OFF = from_posterior('hyper_param_2_OFF_{0}'.format(index), trace['hyper_param_2_OFF_{0}'.format(index - 1)][burnin:])

		# Likelihood
		mu = np.log(beta_1_ON*np.power(ON_shared, hyper_param_1_ON) + beta_1_OFF*np.power(OFF_shared, hyper_param_1_OFF))
		sd = np.log(beta_2_ON*np.power(ON_shared, hyper_param_2_ON) + beta_2_OFF*np.power(OFF_shared, hyper_param_2_OFF))

		DWELLS_obs = pm.Lognormal('DWELLS_obs_{0}'.format(index), mu=mu, sd=sd, observed=DWTIME)

		# Sample using MCMC
		step = pm.Metropolis([beta_1_ON, beta_1_OFF, beta_2_ON, beta_2_OFF, hyper_param_1_ON, hyper_param_1_OFF, hyper_param_2_ON, hyper_param_2_OFF])
		trace = pm.sample(1000, step=step)

		log_entry['beta_1_ON mean'] = np.mean(trace['beta_1_ON_{0}'.format(index)])
		log_entry['beta_1_ON std'] = np.std(trace['beta_1_ON_{0}'.format(index)])
		log_entry['beta_1_OFF mean'] = np.mean(trace['beta_1_OFF_{0}'.format(index)])
		log_entry['beta_1_OFF std'] = np.std(trace['beta_1_OFF_{0}'.format(index)])
		log_entry['beta_2_ON mean'] = np.mean(trace['beta_2_ON_{0}'.format(index)])
		log_entry['beta_2_ON std'] = np.std(trace['beta_2_ON_{0}'.format(index)])
		log_entry['beta_2_OFF mean'] = np.mean(trace['beta_2_OFF_{0}'.format(index)])
		log_entry['beta_2_OFF std'] = np.std(trace['beta_2_OFF_{0}'.format(index)])

		# Plot posterior distributions for coefficients
		_ = pm.traceplot(trace, figsize=(10,7))
		filename = row['ARR'] + '_posteriors'
		if save:
			plt.savefig(prediction_plots_dir + '/' + filename + '.svg')
			plt.savefig(prediction_plots_dir + '/' + filename + '.png')
		plt.show(block=False)
		plt.pause(0.5)
		plt.close()

		# Make prediction for next dwell time
		if index == list(df.index)[-1]:
			estimated_ON = float(1)
			estimated_OFF = float(1)
			observed_ON = estimated_ON
			observed_OFF = estimated_OFF
		else:
			df1 = df.iloc[[index + 1]].values[0]
			estimated_ON = float(df1[10])
			estimated_OFF = float(df1[11])
			observed_ON = estimated_ON
			observed_OFF = estimated_OFF

		ON_shared.set_value(estimated_ON)
		OFF_shared.set_value(estimated_OFF)

		log_entry['Estimated ON'] = float(estimated_ON)
		log_entry['Estimated OFF'] = float(estimated_OFF)

		# Sample again to find Posterior Predictive Distributino
		ppc = pm.sample_ppc(trace, samples=500)
		posterior_dists = ppc['DWELLS_obs_{0}'.format(index)]

		# Use mean of medians for prediction
		posterior_medians = [np.median(n) for n in posterior_dists]
		posterior_medians_series = pd.Series(posterior_medians)
		prediction = round(np.mean(posterior_medians_series[posterior_medians_series <= CAP]), 2)
		log_entry['Predicted Dwell'] = prediction

		# Reveal ground truth
		if index == list(df.index)[-1]:  # No ground truth if we are at the end of data-set (avoid index error)
			observed_dwtime = -1
		else:
			observed_dwtime = df.ix[index + 1]['DWTIME']

		# Plot medians to show uncertainty
		plt.title('Posterior Predictive Distribution of Dwell Times ' + note + '\nCentre Avenue at Aiken Avenue, inbound, 2013-10-28, {0}'.format(row['ARR'][-8:]))
		print(len(posterior_medians_series[posterior_medians_series <= CAP]))
		if(len(posterior_medians_series[posterior_medians_series <= CAP]) < 10):
			sns.kdeplot(posterior_medians_series, bw=0.15, label='PDF of Predicted Dwell')
		else:
			sns.kdeplot(posterior_medians_series[posterior_medians_series <= CAP], bw=0.15, label='PDF of Predicted Dwell')
		if prediction - observed_dwtime > 0:
			plt.plot([prediction, observed_dwtime], [0,0], color='orange', alpha=0.4, linewidth=3)
		else:
			plt.plot([prediction, observed_dwtime], [0,0], color='red', alpha=0.3, linewidth=3)
		plt.plot(prediction, [0], '*', markersize=10, color='blue', label='Prediction: {0} sec'.format(prediction))
		plt.plot(observed_dwtime, [0], '.', markersize=75, color='green', alpha=0.1)
		plt.plot(observed_dwtime, [0], '*', markersize=10, color='green', label='Observed: {0} sec'.format(observed_dwtime))
		plt.xlabel('Dwell Time (sec)')
		plt.xlim(0,CAP)
		plt.legend()
		if save:
			filename = row['ARR'] + '_ppd'
			plt.savefig(prediction_plots_dir + '/' + filename + '.svg')
			plt.savefig(prediction_plots_dir + '/' + filename + '.png')
		plt.show(block=False)
		plt.pause(2)
		plt.close()

	predictions.append(prediction)
	observations.append(observed_dwtime)
	hyper_param_1_ON_means.append(np.mean(trace['hyper_param_1_ON_{0}'.format(index)]))
	hyper_param_1_OFF_means.append(np.mean(trace['hyper_param_1_OFF_{0}'.format(index)]))
	hyper_param_2_ON_means.append(np.mean(trace['hyper_param_2_ON_{0}'.format(index)]))
	hyper_param_2_OFF_means.append(np.mean(trace['hyper_param_2_OFF_{0}'.format(index)]))
	hyper_param_1_ON_stds.append(np.std(trace['hyper_param_1_ON_{0}'.format(index)]))
	hyper_param_1_OFF_stds.append(np.std(trace['hyper_param_1_OFF_{0}'.format(index)]))
	hyper_param_2_ON_stds.append(np.std(trace['hyper_param_2_ON_{0}'.format(index)]))
	hyper_param_2_OFF_stds.append(np.std(trace['hyper_param_2_OFF_{0}'.format(index)]))

	if observed_dwtime != 'none':
		differences.append(prediction - observed_dwtime)
	else:
		differences.append(0)

	if len(log) >= 1:
		for key in log_keys:
			print(key, log[-1][key])

	log.append(log_entry)

	with open('centre_ave_aiken.json', 'w') as fp:
		json.dump(log, fp)

	# Plot Hyperparameter values thus far over simulation
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_ON_means, '-*', label='hyper_param_1_ON_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_ON_stds, '--', label='hyper_param_1_ON_stds',  marker='*',markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_OFF_means, '-*', label='hyper_param_1_OFF_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_OFF_stds, '--', label='hyper_param_1_OFF_stds', marker='*',markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_ON_means, '-*', label='hyper_param_2_ON_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_ON_stds, '--', label='hyper_param_2_ON_stds',  marker='*',markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_OFF_means, '-*', label='hyper_param_2_OFF_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_OFF_stds, '--', label='hyper_param_2_OFF_stds', marker='*',markersize=10)
	plt.legend()
	plt.title('Hyperparameters ' + note)
	plt.xlabel('Simulation Index')
	if save:
		plt.savefig(prediction_plots_dir + '/hyperparameters.svg')
		plt.savefig(prediction_plots_dir + '/hyperparameters.png')
	plt.show(block=False)
	plt.pause(1)
	plt.close()


	# Plot comparison
	plt.plot(range(len(predictions[:-1])), predictions[:-1], label='predictions')
	plt.plot(range(len(predictions[:-1])), observations[:-1], label='observations')
	plt.legend()
	plt.title('Dwell Time Predictions and Observations ' + note + '\nCentre Avenue at Aiken Avenue, inbound, 2013-10-28')
	plt.xlabel('Simulation Index')
	plt.ylabel('Dwell Time (s)')
	plt.fill_between(range(len(predictions[:-1])), predictions[:-1], observations[:-1], color='red', alpha=0.3)
	if save:
		plt.savefig(prediction_plots_dir + '/comparison.svg')
		plt.savefig(prediction_plots_dir + '/comparison.png')
	plt.show(block=False)
	plt.pause(0.1)
	plt.close()


#######################################################################################################################
# Plotting Simulation Results
#######################################################################################################################

pp = pprint.PrettyPrinter(width=4,indent=4)
pp.pprint(log)

# with open(log_dir + test_data_filepath.split('/')[-1].split('.')[0] + dt_string + 'filtered.json', 'w') as fp:
# 	json.dump(log, fp)

with open('centre_ave_aiken.json', 'w') as fp:
	json.dump(log, fp)

# Plot comparison
plt.plot(range(len(predictions[:-1])), predictions[:-1], label='predictions')
plt.plot(range(len(predictions[:-1])), observations[:-1], label='observations')
plt.legend()
plt.title('Dwell Time Predictions and Observations ' + note + '\nCentre Avenue at Aiken Avenue, inbound, 2013-10-28')
plt.xlabel('Simulation Index')
plt.ylabel('Dwell Time (s)')
plt.fill_between(range(len(predictions[:-1])), predictions[:-1], observations[:-1], color='red', alpha=0.3)
if save:
	plt.savefig(prediction_plots_dir + '/comparison.svg')
	plt.savefig(prediction_plots_dir + '/comparison.png')
plt.show()
plt.close()

# Plot error
plt.plot(range(len(differences[:-1])), differences[:-1], label='Prediction - Observed (s)')
plt.legend()
plt.title('Av. Abs. Er.: {0}; Av. Er.: {1} '.format(np.mean(np.abs(differences[:-1])), np.mean(differences[:-1])))
plt.xlabel('Simulation Index')
plt.ylabel('Dwell Time Error (s)')
if save:
	plt.savefig(prediction_plots_dir + '/err.svg')
	plt.savefig(prediction_plots_dir + '/err.png')
plt.show()
plt.close()

# Plot error dist
print(differences)
print(predictions)
mean = round(np.mean(differences[:-1]), 2)
med = round(np.median(differences[:-1]), 2)
std = round(np.std(differences[:-1]), 2)
abs_mean = round(np.mean(np.abs(differences[:-1])), 2)
abs_med = round(np.median(np.abs(differences[:-1])), 2)
print(mean, med, std, abs_mean, abs_med)
series = pd.Series(differences[:-1])
plt.hist(series)
plt.show()
plt.close()
sns.kdeplot(series, bw=0.3)
plt.plot(mean, [0], '*', markersize=5, label='Mean: ' + str(mean) + ' s')
plt.plot(abs_mean, [0], '*', markersize=5, label='Abs. Mean: ' + str(abs_mean) + ' s')
plt.title('KDE Distribution of Errors {0}\nmean {1} med {2} std {3} abs. mean {4} abs. med {5}'.format(note, mean, med, std, abs_mean, abs_med))
plt.xlabel('Prediction - Observed (s)')
plt.legend()
plt.savefig(prediction_plots_dir + '/err_dist.svg')
plt.savefig(prediction_plots_dir + '/err_dist.png')
plt.show()
plt.close()

# Plot hyperparameter values
plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_ON_means, '-*', label='hyper_param_1_ON_means', markersize=10)
plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_ON_stds, '--', label='hyper_param_1_ON_stds',  marker='*',markersize=10)
plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_OFF_means, '-*', label='hyper_param_1_OFF_means', markersize=10)
plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_OFF_stds, '--', label='hyper_param_1_OFF_stds', marker='*',markersize=10)
plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_ON_means, '-*', label='hyper_param_2_ON_means', markersize=10)
plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_ON_stds, '--', label='hyper_param_2_ON_stds',  marker='*',markersize=10)
plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_OFF_means, '-*', label='hyper_param_2_OFF_means', markersize=10)
plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_OFF_stds, '--', label='hyper_param_2_OFF_stds', marker='*',markersize=10)
plt.legend()
plt.title('Hyperparameters ' + note)
plt.xlabel('Simulation Index')
if save:
	plt.savefig(prediction_plots_dir + '/hyperparameters.svg')
	plt.savefig(prediction_plots_dir + '/hyperparameters.png')
plt.show()
plt.close()

print("Average absolute error: ", np.mean(np.abs(differences[:-1])))
print("Average error: ", np.mean(differences[:-1]))
	#######################################################################################################################
	# Simulation of Bayesian Dwell Time Model Assuming Error-free Covariates
	# Conor Igoe - 2017
	#
	# Loads relevant data from the test directory given by test_data_filepath
	# Initialises the necessary data structures for PyMC3 and logging
	# Steps through the loaded test data chronologically
	# For the 1st data point, with no posteriors to use as priors for the model, uses uniform priors
	# For all other data points, uses posteriors from the previous datapoint as priors for current point in simulation
	# Calculates the likelihood
	# Samples from the posteriors using MCMC
	# Logs and plots results
	# Samples from the Posterior Predictive Distribution and uses medians of MCMC samples to inform belief of dwell times
	# Predicts a dwell time by taing mean of the medians
	# Logs error between prediction and ground truth at current point in simulation
	# Plots the result of simulation: comparison of truth with prediction, residuals, dist. of errors, hyperparam values
	#######################################################################################################################


	import sys
	sys.path.insert(0, '../../data_management')
	sys.path.insert(0, '../../analysis')
	sys.path.insert(0, '../')
	from modelling_utils import *
	from utils import *
	from datetime import datetime
	import os

	dt_string = datetime.now().strftime("_%Y_%m_%d_%H_%M_%S")

	pp = pprint.PrettyPrinter(width=4,indent=4)

	# Load Data
	test_data_filepath = '../../data/TEST.csv'
	df = read_csvs([test_data_filepath])
	df = df[df['DWTIME'] > 0]
	df = df.sort_values('ARR').reset_index(drop=True)


	# test_data_filepath = []
	# for root,dirs,files in os.walk('../../data/long_test'):
	# for file in files:
	# if file.endswith(".csv"):
	# test_data_filepath.append('../../data/long_test/' + file)

	# df = read_csvs(test_data_filepath)
	# df = df[df['DWTIME'] > 0]
	# df = df.sort_values('ARR').reset_index(drop=True)
	# print('Length of dataframe: ', len(df))


	prediction_plots_dir = '../../assets/centre_ave_aiken'
	log_dir = '../logs/'
	log_suffix = 'centre_ave_aiken.json'
	note = '(centre_ave_aiken)'
	# Upper Limit for plotting
	CAP = 60

	# Flag to allow for cold starting
	flag = True
	save = True

	# Logging Data Structure
	log = [{'Arrival Time': 0, 'Observed Dwell': 0, 'Predicted Dwell': 0, 'Observed ON': 0, 'Estimated ON': 0, 'Observed OFF': 0, 'Estimated OFF': 0,
	'beta_1_ON mean': 0, 'beta_1_ON std': 0,'beta_1_OFF mean': 0, 'beta_1_OFF std': 0,
	'beta_2_ON mean': 0, 'beta_2_ON std': 0,'beta_2_OFF mean': 0, 'beta_2_OFF std': 0,
	'bus_route': 0}]

	log_keys = ['Arrival Time', 'Observed Dwell', 'Predicted Dwell', 'Observed ON', 'Estimated ON', 'Observed OFF', 'Estimated OFF',
	'beta_1_ON mean', 'beta_1_ON std', 'beta_1_OFF mean', 'beta_1_OFF std',
	'beta_2_ON mean', 'beta_2_ON std', 'beta_2_OFF mean', 'beta_2_OFF std',
	'bus_route']

	predictions = []
	observations = []
	differences = []

	hyper_param_1_ON_means = []
	hyper_param_1_OFF_means = []
	hyper_param_2_ON_means = []
	hyper_param_2_OFF_means = []
	hyper_param_1_ON_stds = []
	hyper_param_1_OFF_stds = []
	hyper_param_2_ON_stds = []
	hyper_param_2_OFF_stds = []


	#######################################################################################################################
	# Simulation
	#######################################################################################################################

	for index, row in df.iterrows():
	log_entry = {}
	print(index)

	# ON, OFF, DWTIME, LOAD = float(row['ON']), float(row['OFF']), float(row['DWTIME']), float(row['LOAD'])
	ON, OFF, DWTIME = float(row['ON']), float(row['OFF']), float(row['DWTIME'])

	log[-1]['Arrival Time'] = str(row['ARR'])
	log[-1]['Observed Dwell'] = DWTIME
	log[-1]['Observed ON'] = ON
	log[-1]['Observed OFF'] = OFF
	# log[-1]['Observed LOAD'] = LOAD

	# ON_shared, OFF_shared, LOAD_shared = shared(ON), shared(OFF), shared(LOAD)
	ON_shared, OFF_shared = shared(ON), shared(OFF)
	bus_route = str(row['ROUTE'])
	log[-1]['bus_route'] = bus_route

	with pm.Model() as basic_model:
	if flag: # no posterior yet - Use Normal as priors
	beta_1_ON = pm.HalfNormal('beta_1_ON_{0}'.format(index), sd=1)
	beta_1_OFF = pm.HalfNormal('beta_1_OFF_{0}'.format(index),sd=1)
	beta_2_ON = pm.HalfNormal('beta_2_ON_{0}'.format(index), sd=1)
	beta_2_OFF = pm.HalfNormal('beta_2_OFF_{0}'.format(index), sd=1)

	hyper_param_1_ON = pm.HalfNormal('hyper_param_1_ON_{0}'.format(index), sd=1)
	hyper_param_1_OFF = pm.HalfNormal('hyper_param_1_OFF_{0}'.format(index), sd=1)
	hyper_param_2_ON = pm.HalfNormal('hyper_param_2_ON_{0}'.format(index), sd=3)
	hyper_param_2_OFF = pm.HalfNormal('hyper_param_2_OFF_{0}'.format(index), sd=3)

	flag = False
	else:
	burnin = int(len(trace) / 5)

	beta_1_ON = from_posterior('beta_1_ON_{0}'.format(index), trace['beta_1_ON_{0}'.format(index - 1)][burnin:])
	beta_1_OFF = from_posterior('beta_1_OFF_{0}'.format(index), trace['beta_1_OFF_{0}'.format(index - 1)][burnin:])
	beta_2_ON = from_posterior('beta_2_ON_{0}'.format(index), trace['beta_2_ON_{0}'.format(index - 1)][burnin:])
	beta_2_OFF = from_posterior('beta_2_OFF_{0}'.format(index), trace['beta_2_OFF_{0}'.format(index - 1)][burnin:])

	hyper_param_1_ON = from_posterior('hyper_param_1_ON_{0}'.format(index), trace['hyper_param_1_ON_{0}'.format(index - 1)][burnin:])
	hyper_param_1_OFF = from_posterior('hyper_param_1_OFF_{0}'.format(index), trace['hyper_param_1_OFF_{0}'.format(index - 1)][burnin:])
	hyper_param_2_ON = from_posterior('hyper_param_2_ON_{0}'.format(index), trace['hyper_param_2_ON_{0}'.format(index - 1)][burnin:])
	hyper_param_2_OFF = from_posterior('hyper_param_2_OFF_{0}'.format(index), trace['hyper_param_2_OFF_{0}'.format(index - 1)][burnin:])

	# Likelihood
	mu = np.log(beta_1_ONnp.power(ON_shared, hyper_param_1_ON) + beta_1_OFFnp.power(OFF_shared, hyper_param_1_OFF))
	sd = np.log(beta_2_ONnp.power(ON_shared, hyper_param_2_ON) + beta_2_OFFnp.power(OFF_shared, hyper_param_2_OFF))

	DWELLS_obs = pm.Lognormal('DWELLS_obs_{0}'.format(index), mu=mu, sd=sd, observed=DWTIME)

	# Sample using MCMC
	step = pm.Metropolis([beta_1_ON, beta_1_OFF, beta_2_ON, beta_2_OFF, hyper_param_1_ON, hyper_param_1_OFF, hyper_param_2_ON, hyper_param_2_OFF])
	trace = pm.sample(1000, step=step)

	log_entry['beta_1_ON mean'] = np.mean(trace['beta_1_ON_{0}'.format(index)])
	log_entry['beta_1_ON std'] = np.std(trace['beta_1_ON_{0}'.format(index)])
	log_entry['beta_1_OFF mean'] = np.mean(trace['beta_1_OFF_{0}'.format(index)])
	log_entry['beta_1_OFF std'] = np.std(trace['beta_1_OFF_{0}'.format(index)])
	log_entry['beta_2_ON mean'] = np.mean(trace['beta_2_ON_{0}'.format(index)])
	log_entry['beta_2_ON std'] = np.std(trace['beta_2_ON_{0}'.format(index)])
	log_entry['beta_2_OFF mean'] = np.mean(trace['beta_2_OFF_{0}'.format(index)])
	log_entry['beta_2_OFF std'] = np.std(trace['beta_2_OFF_{0}'.format(index)])

	# Plot posterior distributions for coefficients
	_ = pm.traceplot(trace, figsize=(10,7))
	filename = row['ARR'] + '_posteriors'
	if save:
	plt.savefig(prediction_plots_dir + '/' + filename + '.svg')
	plt.savefig(prediction_plots_dir + '/' + filename + '.png')
	plt.show(block=False)
	plt.pause(0.5)
	plt.close()

	# Make prediction for next dwell time
	if index == list(df.index)[-1]:
	estimated_ON = float(1)
	estimated_OFF = float(1)
	observed_ON = estimated_ON
	observed_OFF = estimated_OFF
	else:
	df1 = df.iloc[[index + 1]].values[0]
	estimated_ON = float(df1[10])
	estimated_OFF = float(df1[11])
	observed_ON = estimated_ON
	observed_OFF = estimated_OFF

	ON_shared.set_value(estimated_ON)
	OFF_shared.set_value(estimated_OFF)

	log_entry['Estimated ON'] = float(estimated_ON)
	log_entry['Estimated OFF'] = float(estimated_OFF)

	# Sample again to find Posterior Predictive Distributino
	ppc = pm.sample_ppc(trace, samples=500)
	posterior_dists = ppc['DWELLS_obs_{0}'.format(index)]

	# Use mean of medians for prediction
	posterior_medians = [np.median(n) for n in posterior_dists]
	posterior_medians_series = pd.Series(posterior_medians)
	prediction = round(np.mean(posterior_medians_series[posterior_medians_series <= CAP]), 2)
	log_entry['Predicted Dwell'] = prediction

	# Reveal ground truth
	if index == list(df.index)[-1]: # No ground truth if we are at the end of data-set (avoid index error)
	observed_dwtime = -1
	else:
	observed_dwtime = df.ix[index + 1]['DWTIME']

	# Plot medians to show uncertainty
	plt.title('Posterior Predictive Distribution of Dwell Times ' + note + '\nCentre Avenue at Aiken Avenue, inbound, 2013-10-28, {0}'.format(row['ARR'][-8:]))
	print(len(posterior_medians_series[posterior_medians_series <= CAP]))
	if(len(posterior_medians_series[posterior_medians_series <= CAP]) < 10):
	sns.kdeplot(posterior_medians_series, bw=0.15, label='PDF of Predicted Dwell')
	else:
	sns.kdeplot(posterior_medians_series[posterior_medians_series <= CAP], bw=0.15, label='PDF of Predicted Dwell')
	if prediction - observed_dwtime > 0:
	plt.plot([prediction, observed_dwtime], [0,0], color='orange', alpha=0.4, linewidth=3)
	else:
	plt.plot([prediction, observed_dwtime], [0,0], color='red', alpha=0.3, linewidth=3)
	plt.plot(prediction, [0], '*', markersize=10, color='blue', label='Prediction: {0} sec'.format(prediction))
	plt.plot(observed_dwtime, [0], '.', markersize=75, color='green', alpha=0.1)
	plt.plot(observed_dwtime, [0], '*', markersize=10, color='green', label='Observed: {0} sec'.format(observed_dwtime))
	plt.xlabel('Dwell Time (sec)')
	plt.xlim(0,CAP)
	plt.legend()
	if save:
	filename = row['ARR'] + '_ppd'
	plt.savefig(prediction_plots_dir + '/' + filename + '.svg')
	plt.savefig(prediction_plots_dir + '/' + filename + '.png')
	plt.show(block=False)
	plt.pause(2)
	plt.close()

	predictions.append(prediction)
	observations.append(observed_dwtime)
	hyper_param_1_ON_means.append(np.mean(trace['hyper_param_1_ON_{0}'.format(index)]))
	hyper_param_1_OFF_means.append(np.mean(trace['hyper_param_1_OFF_{0}'.format(index)]))
	hyper_param_2_ON_means.append(np.mean(trace['hyper_param_2_ON_{0}'.format(index)]))
	hyper_param_2_OFF_means.append(np.mean(trace['hyper_param_2_OFF_{0}'.format(index)]))
	hyper_param_1_ON_stds.append(np.std(trace['hyper_param_1_ON_{0}'.format(index)]))
	hyper_param_1_OFF_stds.append(np.std(trace['hyper_param_1_OFF_{0}'.format(index)]))
	hyper_param_2_ON_stds.append(np.std(trace['hyper_param_2_ON_{0}'.format(index)]))
	hyper_param_2_OFF_stds.append(np.std(trace['hyper_param_2_OFF_{0}'.format(index)]))

	if observed_dwtime != 'none':
	differences.append(prediction - observed_dwtime)
	else:
	differences.append(0)

	if len(log) >= 1:
	for key in log_keys:
	print(key, log[-1][key])

	log.append(log_entry)

	with open('centre_ave_aiken.json', 'w') as fp:
	json.dump(log, fp)

	# Plot Hyperparameter values thus far over simulation
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_ON_means, '-*', label='hyper_param_1_ON_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_ON_stds, '--', label='hyper_param_1_ON_stds', marker='*',markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_OFF_means, '-*', label='hyper_param_1_OFF_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_OFF_stds, '--', label='hyper_param_1_OFF_stds', marker='*',markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_ON_means, '-*', label='hyper_param_2_ON_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_ON_stds, '--', label='hyper_param_2_ON_stds', marker='*',markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_OFF_means, '-*', label='hyper_param_2_OFF_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_OFF_stds, '--', label='hyper_param_2_OFF_stds', marker='*',markersize=10)
	plt.legend()
	plt.title('Hyperparameters ' + note)
	plt.xlabel('Simulation Index')
	if save:
	plt.savefig(prediction_plots_dir + '/hyperparameters.svg')
	plt.savefig(prediction_plots_dir + '/hyperparameters.png')
	plt.show(block=False)
	plt.pause(1)
	plt.close()


	# Plot comparison
	plt.plot(range(len(predictions[:-1])), predictions[:-1], label='predictions')
	plt.plot(range(len(predictions[:-1])), observations[:-1], label='observations')
	plt.legend()
	plt.title('Dwell Time Predictions and Observations ' + note + '\nCentre Avenue at Aiken Avenue, inbound, 2013-10-28')
	plt.xlabel('Simulation Index')
	plt.ylabel('Dwell Time (s)')
	plt.fill_between(range(len(predictions[:-1])), predictions[:-1], observations[:-1], color='red', alpha=0.3)
	if save:
	plt.savefig(prediction_plots_dir + '/comparison.svg')
	plt.savefig(prediction_plots_dir + '/comparison.png')
	plt.show(block=False)
	plt.pause(0.1)
	plt.close()


	#######################################################################################################################
	# Plotting Simulation Results
	#######################################################################################################################

	pp = pprint.PrettyPrinter(width=4,indent=4)
	pp.pprint(log)

	# with open(log_dir + test_data_filepath.split('/')[-1].split('.')[0] + dt_string + 'filtered.json', 'w') as fp:
	# json.dump(log, fp)

	with open('centre_ave_aiken.json', 'w') as fp:
	json.dump(log, fp)

	# Plot comparison
	plt.plot(range(len(predictions[:-1])), predictions[:-1], label='predictions')
	plt.plot(range(len(predictions[:-1])), observations[:-1], label='observations')
	plt.legend()
	plt.title('Dwell Time Predictions and Observations ' + note + '\nCentre Avenue at Aiken Avenue, inbound, 2013-10-28')
	plt.xlabel('Simulation Index')
	plt.ylabel('Dwell Time (s)')
	plt.fill_between(range(len(predictions[:-1])), predictions[:-1], observations[:-1], color='red', alpha=0.3)
	if save:
	plt.savefig(prediction_plots_dir + '/comparison.svg')
	plt.savefig(prediction_plots_dir + '/comparison.png')
	plt.show()
	plt.close()

	# Plot error
	plt.plot(range(len(differences[:-1])), differences[:-1], label='Prediction - Observed (s)')
	plt.legend()
	plt.title('Av. Abs. Er.: {0}; Av. Er.: {1} '.format(np.mean(np.abs(differences[:-1])), np.mean(differences[:-1])))
	plt.xlabel('Simulation Index')
	plt.ylabel('Dwell Time Error (s)')
	if save:
	plt.savefig(prediction_plots_dir + '/err.svg')
	plt.savefig(prediction_plots_dir + '/err.png')
	plt.show()
	plt.close()

	# Plot error dist
	print(differences)
	print(predictions)
	mean = round(np.mean(differences[:-1]), 2)
	med = round(np.median(differences[:-1]), 2)
	std = round(np.std(differences[:-1]), 2)
	abs_mean = round(np.mean(np.abs(differences[:-1])), 2)
	abs_med = round(np.median(np.abs(differences[:-1])), 2)
	print(mean, med, std, abs_mean, abs_med)
	series = pd.Series(differences[:-1])
	plt.hist(series)
	plt.show()
	plt.close()
	sns.kdeplot(series, bw=0.3)
	plt.plot(mean, [0], '*', markersize=5, label='Mean: ' + str(mean) + ' s')
	plt.plot(abs_mean, [0], '*', markersize=5, label='Abs. Mean: ' + str(abs_mean) + ' s')
	plt.title('KDE Distribution of Errors {0}\nmean {1} med {2} std {3} abs. mean {4} abs. med {5}'.format(note, mean, med, std, abs_mean, abs_med))
	plt.xlabel('Prediction - Observed (s)')
	plt.legend()
	plt.savefig(prediction_plots_dir + '/err_dist.svg')
	plt.savefig(prediction_plots_dir + '/err_dist.png')
	plt.show()
	plt.close()

	# Plot hyperparameter values
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_ON_means, '-*', label='hyper_param_1_ON_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_ON_stds, '--', label='hyper_param_1_ON_stds', marker='*',markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_OFF_means, '-*', label='hyper_param_1_OFF_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_1_OFF_stds, '--', label='hyper_param_1_OFF_stds', marker='*',markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_ON_means, '-*', label='hyper_param_2_ON_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_ON_stds, '--', label='hyper_param_2_ON_stds', marker='*',markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_OFF_means, '-*', label='hyper_param_2_OFF_means', markersize=10)
	plt.plot(range(len(hyper_param_1_ON_means)), hyper_param_2_OFF_stds, '--', label='hyper_param_2_OFF_stds', marker='*',markersize=10)
	plt.legend()
	plt.title('Hyperparameters ' + note)
	plt.xlabel('Simulation Index')
	if save:
	plt.savefig(prediction_plots_dir + '/hyperparameters.svg')
	plt.savefig(prediction_plots_dir + '/hyperparameters.png')
	plt.show()
	plt.close()

	print("Average absolute error: ", np.mean(np.abs(differences[:-1])))
	print("Average error: ", np.mean(differences[:-1]))