Skip to content

Instantly share code, notes, and snippets.

@newquant1
Last active November 20, 2018 16:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save newquant1/28277edc6713cc92ff9e01e751ac1c3b to your computer and use it in GitHub Desktop.
Save newquant1/28277edc6713cc92ff9e01e751ac1c3b to your computer and use it in GitHub Desktop.
import pymc3 as pm
import os
import pandas as pd
import numpy as np
from theano import theano, scalar, tensor as tt
from tqdm import tqdm
def _expand_chol(n, var, od):
sq = [[0.]*n for _ in range(n)]
var_ix, od_ix = 0, 0
for i in range(n):
for j in range(0, i+1):
if i==j:
sq[i][j] = var[var_ix]
var_ix+=1
else:
sq[i][j] = od[od_ix]
od_ix+=1
return sq
def expand_chol(n, var, od):
sq = _expand_chol(n, var, od)
for i in range(n):
sq[i] = tt.as_tensor(sq[i])
return tt.as_tensor(sq)
def build_basic_rw_model(observations):
subsample_rate=1
total_time, n_secs = observations.shape
with pm.Model() as bayesian_cov:
log_var = pm.GaussianRandomWalk(
"log_var",
mu=0,
sd=.1,
shape=(total_time//subsample_rate, n_secs),
)
var = tt.exp(log_var)
lower_chol = pm.GaussianRandomWalk(
"lower_chol",
mu=0,
sd=.1,
shape=(total_time//subsample_rate, n_secs*(n_secs-1)//2)
)
cholesky = []
print("doing choleskys")
for t in tqdm(range(total_time//subsample_rate)):
cholesky.append(
expand_chol(n_secs, var[t,:].flatten(), lower_chol[t,:].flatten())
)
cholesky = tt.as_tensor(cholesky)
print("cholesky done")
print('doing covariance')
covariance = []
for t in tqdm(range(total_time//subsample_rate)):
covariance.append(cholesky[t].dot(cholesky[t].T))
covariance = pm.Deterministic('covariance', tt.as_tensor(covariance))
#
# reshaped_returns = observations.values[
# :(subsample_rate*(total_time//subsample_rate))
# ].reshape(total_time//subsample_rate, subsample_rate, n_secs)
reshaped_returns = observations.values
time_segments = reshaped_returns.shape[0]
print("doing likelyhoods")
print("n time segments", time_segments)
print("single obs")
for t in tqdm(range(time_segments)):
pm.MvNormal(
'likelihood_{0}'.format(t),
mu=np.zeros(n_secs),
chol=cholesky[t,:,:],
observed=reshaped_returns[t, :]
)
return bayesian_cov, reshaped_returns
if __name__=="__main__":
dfs = []
etfpath = "/path/to/some/yahoofinance/etfs/"
for f in os.listdir(etfpath):
df = pd.read_csv(etfpath+f)
df.index = pd.to_datetime(df['Date'])
df[f] = np.log(df['Adj Close'].div(df['Adj Close'].shift(1)))
# print(df.head())
dfs.append(df[f])
df = pd.concat(dfs, axis=1)
df = df[df.index > pd.to_datetime("2006-01-01")].dropna()
df = df.tail(252)
model = build_basic_rw_model(df)
with model:
trace = pm.sample(1000, tune=1000, njobs=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment