Last active
November 20, 2018 16:23
-
-
Save newquant1/28277edc6713cc92ff9e01e751ac1c3b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymc3 as pm | |
import os | |
import pandas as pd | |
import numpy as np | |
from theano import theano, scalar, tensor as tt | |
from tqdm import tqdm | |
def _expand_chol(n, var, od): | |
sq = [[0.]*n for _ in range(n)] | |
var_ix, od_ix = 0, 0 | |
for i in range(n): | |
for j in range(0, i+1): | |
if i==j: | |
sq[i][j] = var[var_ix] | |
var_ix+=1 | |
else: | |
sq[i][j] = od[od_ix] | |
od_ix+=1 | |
return sq | |
def expand_chol(n, var, od): | |
sq = _expand_chol(n, var, od) | |
for i in range(n): | |
sq[i] = tt.as_tensor(sq[i]) | |
return tt.as_tensor(sq) | |
def build_basic_rw_model(observations): | |
subsample_rate=1 | |
total_time, n_secs = observations.shape | |
with pm.Model() as bayesian_cov: | |
log_var = pm.GaussianRandomWalk( | |
"log_var", | |
mu=0, | |
sd=.1, | |
shape=(total_time//subsample_rate, n_secs), | |
) | |
var = tt.exp(log_var) | |
lower_chol = pm.GaussianRandomWalk( | |
"lower_chol", | |
mu=0, | |
sd=.1, | |
shape=(total_time//subsample_rate, n_secs*(n_secs-1)//2) | |
) | |
cholesky = [] | |
print("doing choleskys") | |
for t in tqdm(range(total_time//subsample_rate)): | |
cholesky.append( | |
expand_chol(n_secs, var[t,:].flatten(), lower_chol[t,:].flatten()) | |
) | |
cholesky = tt.as_tensor(cholesky) | |
print("cholesky done") | |
print('doing covariance') | |
covariance = [] | |
for t in tqdm(range(total_time//subsample_rate)): | |
covariance.append(cholesky[t].dot(cholesky[t].T)) | |
covariance = pm.Deterministic('covariance', tt.as_tensor(covariance)) | |
# | |
# reshaped_returns = observations.values[ | |
# :(subsample_rate*(total_time//subsample_rate)) | |
# ].reshape(total_time//subsample_rate, subsample_rate, n_secs) | |
reshaped_returns = observations.values | |
time_segments = reshaped_returns.shape[0] | |
print("doing likelyhoods") | |
print("n time segments", time_segments) | |
print("single obs") | |
for t in tqdm(range(time_segments)): | |
pm.MvNormal( | |
'likelihood_{0}'.format(t), | |
mu=np.zeros(n_secs), | |
chol=cholesky[t,:,:], | |
observed=reshaped_returns[t, :] | |
) | |
return bayesian_cov, reshaped_returns | |
if __name__=="__main__": | |
dfs = [] | |
etfpath = "/path/to/some/yahoofinance/etfs/" | |
for f in os.listdir(etfpath): | |
df = pd.read_csv(etfpath+f) | |
df.index = pd.to_datetime(df['Date']) | |
df[f] = np.log(df['Adj Close'].div(df['Adj Close'].shift(1))) | |
# print(df.head()) | |
dfs.append(df[f]) | |
df = pd.concat(dfs, axis=1) | |
df = df[df.index > pd.to_datetime("2006-01-01")].dropna() | |
df = df.tail(252) | |
model = build_basic_rw_model(df) | |
with model: | |
trace = pm.sample(1000, tune=1000, njobs=2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment