Create a gist now

Instantly share code, notes, and snippets.

# Backtesting Algo Strategies based on
# Logistic Regression with scikit-learn
# Yves Hilpisch
# ODSC London 2016
# The Python Quants GmbH
import numpy as np
import pandas as pd
import seaborn as sns; sns.set()
from pandas_datareader import data as web
from sklearn import linear_model
class ScikitBacktest(object):
def __init__(self, sym):
self.lags = 5
self.symbol = sym
self.lm = linear_model.LogisticRegression(C=1e3)
def get_data(self):
d = web.DataReader(self.symbol, data_source='yahoo')['Adj Close']
d = pd.DataFrame(d)
d.columns = [self.symbol]
d['returns'] = np.log(d / d.shift(1)) = d
def select_data(self, start, end):
d =[( >= start) & ( <= end)].copy()
return d
def get_matrix(self, start, end):
d = self.select_data(start, end)
m = np.zeros((self.lags+1, len(d)-self.lags))
for i in range(self.lags+1):
if i == self.lags:
m[i] = d.returns.values[i:]
m[i] = d.returns.values[i:i-self.lags]
self.matrix = m
def fit_model(self, start, end):
self.get_matrix(start, end)[:self.lags].T, np.sign(self.matrix[self.lags]))
def predict_moves(self, start, end):
self.get_matrix(start, end)
pred = self.lm.predict(self.matrix[:self.lags].T)
return pred
def run_strategy(self, start_tr, end_tr, start_te, end_te, lags):
self.lags = lags
self.fit_model(start_tr, end_tr)
pred = self.predict_moves(start_te, end_te)
d = self.select_data(start_te, end_te)
d['pred'] = 0.0
d['pred'].ix[self.lags:] = pred
d['strategy'] = d.pred * d.returns
title = '%s to %s for %d lags' % (start_te, end_te, self.lags)
d[['returns', 'strategy']].ix[self.lags:].cumsum().apply(np.exp).plot(title=title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment