Skip to content

Instantly share code, notes, and snippets.

@QuantTraderEd
Last active August 29, 2015 14:20
Show Gist options
  • Save QuantTraderEd/3062b939426859cde468 to your computer and use it in GitHub Desktop.
Save QuantTraderEd/3062b939426859cde468 to your computer and use it in GitHub Desktop.
EigenPort
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 30 12:44:08 2015
@author: assa
"""
import numpy as np
import pandas as pd
import pandas.io.data as web
import datetime as dt
from numpy import linalg as LA
from read_shortcd import read_shortcd
def runPCA(df_ret):
df_norm_ret = (df_ret - df_ret.mean()) / (df_ret.std())
emp_corr = df_norm_ret.corr()
w, v = LA.eig(emp_corr)
eigen_index = w.argmin()
weight = v[:,eigen_index] / df_ret.std()
weight = weight / sum(weight)
print '-------- weight -----------'
print weight
return weight
pass
def runOptPCA(df_ret):
df_norm_ret = (df_ret - df_ret.mean()) / (df_ret.std())
df_norm_ret = df_norm_ret.fillna(0)
df_ret_std = df_ret.std()
df_ret_std = df_ret_std.fillna(0.1)
emp_corr = df_norm_ret.corr()
emp_corr.values[[np.arange(len(emp_corr))]*2] = 1.0
emp_corr = emp_corr.fillna(0)
w, v = LA.eig(emp_corr)
eigen_index = w.argmin()
max_sr = -9999999
max_eigen_index = w.argmin()
for i in xrange(len(w)):
try:
eigen_index = i
weight = v[:,eigen_index] / df_ret_std
weight[weight < 0] = 0 # no short selling
weight = weight / sum(weight)
df_port = df_ret * weight
df_port['Port_Ret'] = df_port.sum(axis=1)
sr = df_port['Port_Ret'].mean() / df_port['Port_Ret'].std()
ret_mean = df_port['Port_Ret'].mean()
ret_std = df_port['Port_Ret'].std()
#print 'Sharpe ratio: ', eigen_index ,sr
if max_sr < sr:
max_sr = sr
max_eigen_index = i
except Warning as e:
print 'index of w: ', i
#print 'ret_mean: ', ret_mean, 'ret_std: ', ret_std
#print 'max_sr: ', max_sr, ' max_eigen_index: ', max_eigen_index
weight = v[:,max_eigen_index] / df_ret_std
weight[weight < 0] = 0 # no short selling
weight = weight / sum(weight)
return weight
pass
start = dt.datetime(2010, 1, 2)
end = dt.datetime(2015, 4, 30)
shortcd_lst = ['A139230',
'A139290',
'A139280',
'A139270',
'A139260',
'A139250',
'A139240',
'A139220',
'A069500',
'A114800'
]
shortcd_lst = [
'A143860',
'A140710',
'A140700',
'A117700',
'A117680',
'A117460',
'A102960',
'A102970',
'A098560',
'A091160',
'A091170',
'A091180',
'A069500',
'A114800'
]
shortcd_lst = read_shortcd('index_ETF.text')
shortcd_yahoo_lst = [item[1:] + '.KS' for item in shortcd_lst]
df_stock = web.DataReader(shortcd_yahoo_lst, 'yahoo', start, end)
df_stock.to_hdf('df_etf1.h5','table')
df_stock = pd.read_hdf('df_etf1.h5','table')
df_volume = df_stock['Volume'].copy()
df_volume_des = df_volume.describe()
df_volume_mean = df_volume_des.mean()
df_volume_mean.sort(ascending=False)
df_liqudity = df_volume_mean[df_volume_mean > 50000]
selected_ETF = list(df_liqudity.index)
df = df_stock['Adj Close'].copy()
df = df[selected_ETF]
df = df.fillna(method='pad')
#df.columns = shortcd_lst
df = df['2014-05-01':]
for i in xrange(len(df.columns)):
df[df.columns[i] + '_Ret'] = df[df.columns[i]].pct_change()
df_ret = df[df.columns[len(selected_ETF):]]
df = df[df.columns[:len(selected_ETF)]]
df_norm_ret = (df_ret - df_ret.mean()) / (df_ret.std())
emp_corr = df_norm_ret.corr()
w, v = LA.eig(emp_corr)
#eigen_index = 6
eigen_index = w.argmax()
print w[eigen_index]
print v[:,eigen_index]
weight = v[:,eigen_index] / df_ret.std()
weight = weight / sum(weight)
print weight
#------ Test -----------
df_port = df_ret * weight
df_port['Port_Ret'] = df_port.sum(axis=1)
s = df_port['Port_Ret'] + 1
s_cum = s.cumprod()
s_cum.plot()
#------ Apply Fee & Tax --------
df = df[df.columns[:len(shortcd_lst)]]
weight.index = df.columns
df_qty = weight / df
df_qty_diff = df_qty.diff()
df_fee = abs(df_qty_diff) * df * 0.00015
df_fee['fee'] = df_fee.sum(axis=1)
df_qty_diff_negative = df_qty_diff.copy()
df_qty_diff_negative[df_qty_diff_negative > 0] = 0
df_tax = abs(df_qty_diff_negative) * df * 0.0030
df_tax['tax'] = df_tax.sum(axis=1)
s = df_port['Port_Ret'] + 1 - df_fee['fee'] - df_tax['tax']
s_cum = s.cumprod()
s_cum.plot()
#------Sharpe ratio optimize -----------
max_sr = -9999999
max_eigen_index = w.argmin()
for i in xrange(len(w)):
eigen_index = i
weight = v[:,eigen_index] / df_ret.std()
weight = weight / sum(weight)
df_port = df_ret * weight
df_port['Port_Ret'] = df_port.sum(axis=1)
df_port['Port_Ret'] = df_port['Port_Ret'] - df_fee['fee'] - df_tax['tax']
sr = df_port['Port_Ret'].mean() / df_port['Port_Ret'].std()
print 'Sharpe ratio: ', eigen_index ,sr
if max_sr < sr:
max_sr = sr
max_eigen_index = i
print 'max_sr: ', max_sr, ' max_eigen_index: ', max_eigen_index
#-------- optimizing back test ---------
prd = 41
hrz = 20
#df_port = pd.DataFrame(columns=df_ret.columns, index=df_ret.index[prd:len(df_ret)])
#for i in xrange(prd,len(df_ret)):
# df_sample = df_ret[i-prd:i-1]
# weight = runOptPCA(df_sample)
# df_port_new = list(df_ret.iloc[i]) * weight
# df_port.iloc[i-prd] = df_port_new
df_port = pd.DataFrame()
weight_prev = pd.Series()
for i in xrange(prd,len(df_ret),hrz):
df_ret_sample = df_ret[i-prd:i-1]
df_sample = df[i-prd:i-1]
try:
weight = runOptPCA(df_ret_sample)
except Warning as e:
print 'index of prd: ', i
df_port_new = df_ret[i:i+hrz] * weight
df_port_new['Port_Ret'] = df_port_new.sum(axis=1)
weight.index = df.columns
if i == prd:
weight_diff = weight.copy()
else:
weight_diff = weight - weight_prev
weight_fee = abs(weight_diff) * 0.00015
weight_negative = weight_diff[weight_diff < 0]
weight_tax = abs(weight_negative) * 0.0030
weight_prev = weight.copy()
df_qty = weight / df
df_qty_diff = df_qty.diff()
df_fee = abs(df_qty_diff) * df * 0.00015
df_fee['fee'] = df_fee.sum(axis=1)
df_qty_diff_negative = df_qty_diff.copy()
df_qty_diff_negative[df_qty_diff_negative > 0] = 0
df_tax = abs(df_qty_diff_negative) * df * 0.0030
df_tax['tax'] = df_tax.sum(axis=1)
df_port_new['Port_Ret'] = df_port_new['Port_Ret'] - df_fee['fee'] - df_tax['tax']
df_port_new['Port_Ret'][0] = df_port_new['Port_Ret'][0] - sum(weight_fee) - sum(weight_tax)
if len(df_port) > 0:
df_port = df_port.append(df_port_new)
else:
df_port = df_port_new.copy()
#df_port['Port_Ret'] = df_port.sum(axis=1)
s = df_port['Port_Ret'] + 1
s_cum = s.cumprod()
s_cum.plot()
s_cum_month = s_cum.resample('M')
s_cum_month_pct = s_cum_month.pct_change()
# s_cum_month_diff.index = s_cum_month_diff.index.to_period('M')
# s_cum_month_diff.plot(kind='bar')
#------- optimzie parm --------
# prd_lst = [21, 41, 61, 121, 251]
prd_lst = [21, 41, 61, 121]
hrz_lst = [5 ,10, 20]
for prd in prd_lst:
for hrz in hrz_lst:
df_port = pd.DataFrame()
weight_prev = pd.Series()
for i in xrange(prd,len(df_ret),hrz):
df_sample = df_ret[i-prd:i-1]
weight = runOptPCA(df_sample)
df_port_new = df_ret[i:i+hrz] * weight
df_port_new['Port_Ret'] = df_port_new.sum(axis=1)
weight.index = df.columns
if i == prd:
weight_diff = weight.copy()
else:
weight_diff = weight - weight_prev
weight_fee = abs(weight_diff) * 0.00015
weight_negative = weight_diff[weight_diff < 0]
weight_tax = abs(weight_negative) * 0.0030
weight_prev = weight.copy()
df_qty = weight / df
df_qty_diff = df_qty.diff()
df_fee = abs(df_qty_diff) * df * 0.00015
df_fee['fee'] = df_fee.sum(axis=1)
df_qty_diff_negative = df_qty_diff.copy()
df_qty_diff_negative[df_qty_diff_negative > 0] = 0
df_tax = abs(df_qty_diff_negative) * df * 0.0030
df_tax['tax'] = df_tax.sum(axis=1)
df_port_new['Port_Ret'] = df_port_new['Port_Ret'] - df_fee['fee'] - df_tax['tax']
df_port_new['Port_Ret'][0] = df_port_new['Port_Ret'][0] - sum(weight_fee) - sum(weight_tax)
if len(df_port) > 0:
df_port = df_port.append(df_port_new)
else:
df_port = df_port_new.copy()
#df_port['Port_Ret'] = df_port.sum(axis=1)
s = df_port['Port_Ret'] + 1
sr = df_port['Port_Ret'].mean() / df_port['Port_Ret'].std()
print 'prd: %d, hrz: %d, SR: %.4f' % (prd, hrz, sr)
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 30 12:44:08 2015
@author: assa
"""
import numpy as np
import pandas as pd
import pandas.io.data as web
import datetime as dt
from numpy import linalg as LA
from read_shortcd import read_shortcd
from read_dbdata import getdbdata
def runPCA(df_ret):
df_norm_ret = (df_ret - df_ret.mean()) / (df_ret.std())
emp_corr = df_norm_ret.corr()
w, v = LA.eig(emp_corr)
eigen_index = w.argmin()
weight = v[:,eigen_index] / df_ret.std()
weight = weight / sum(weight)
print '-------- weight -----------'
print weight
return weight
pass
def runOptPCA(df_ret):
df_norm_ret = (df_ret - df_ret.mean()) / (df_ret.std())
df_norm_ret = df_norm_ret.fillna(0)
df_ret_std = df_ret.std()
df_ret_std = df_ret_std.fillna(0.1)
emp_corr = df_norm_ret.corr()
emp_corr.values[[np.arange(len(emp_corr))]*2] = 1.0
emp_corr = emp_corr.fillna(0)
w, v = LA.eig(emp_corr)
eigen_index = w.argmin()
max_sr = -9999999
max_eigen_index = w.argmin()
for i in xrange(len(w)):
try:
eigen_index = i
weight = v[:,eigen_index] / df_ret_std
weight[weight < 0] = 0 # no short selling
weight = weight / sum(weight)
df_port = df_ret * weight
df_port['Port_Ret'] = df_port.sum(axis=1)
sr = df_port['Port_Ret'].mean() / df_port['Port_Ret'].std()
ret_mean = df_port['Port_Ret'].mean()
ret_std = df_port['Port_Ret'].std()
#print 'Sharpe ratio: ', eigen_index ,sr
if max_sr < sr:
max_sr = sr
max_eigen_index = i
except Warning as e:
print 'index of w: ', i
#print 'ret_mean: ', ret_mean, 'ret_std: ', ret_std
#print 'max_sr: ', max_sr, ' max_eigen_index: ', max_eigen_index
weight = v[:,max_eigen_index] / df_ret_std
weight[weight < 0] = 0 # no short selling
weight = weight.astype(float)
weight = weight.fillna(0)
weight = weight / sum(weight)
return weight
pass
start = dt.datetime(2010, 1, 2)
end = dt.datetime(2015, 4, 30)
shortcd_lst = ['A139230',
'A139290',
'A139280',
'A139270',
'A139260',
'A139250',
'A139240',
'A139220',
'A069500',
'A114800'
]
shortcd_lst = [
'A143860',
'A140710',
'A140700',
'A117700',
'A117680',
'A117460',
'A102960',
'A102970',
'A098560',
'A091160',
'A091170',
'A091180',
'A069500',
'A114800'
]
shortcd_lst = read_shortcd('kosdaq100.text')
filedbname = 'kosdaq100.db'
start = '2010-01-01'
end = '2015-05-10'
shortcd_yahoo_lst = [item[1:] + '.KS' for item in shortcd_lst]
# df_stock = web.DataReader(shortcd_yahoo_lst, 'yahoo', start, end)
df_stock = getdbdata(filedbname, shortcd_lst, start, end)
df_stock.to_hdf('df_etf1.h5','table')
df_stock = pd.read_hdf('df_etf1.h5','table')
#df_volume = df_stock['Volume'].copy()
#df_volume_des = df_volume.describe()
#df_volume_mean = df_volume_des.mean()
#df_volume_mean.sort(ascending=False)
#df_liqudity = df_volume_mean[df_volume_mean > 50000]
#selected_ETF = list(df_liqudity.index)
df = df_stock['Close'].copy()
# df = df_stock['Adj Close'].copy()
# df = df[selected_ETF]
df = df.fillna(method='pad')
# df.columns = shortcd_lst
for i in xrange(len(df.columns)):
df[df.columns[i] + '_Ret'] = df[df.columns[i]].pct_change()
df_ret = df[df.columns[len(shortcd_lst):]]
df = df[df.columns[:len(shortcd_lst)]]
df_norm_ret = (df_ret - df_ret.mean()) / (df_ret.std())
emp_corr = df_norm_ret.corr()
w, v = LA.eig(emp_corr)
#eigen_index = 6
eigen_index = w.argmax()
print w[eigen_index]
print v[:,eigen_index]
weight = v[:,eigen_index] / df_ret.std()
weight = weight / sum(weight)
print weight
#------ Test -----------
df_port = df_ret * weight
df_port['Port_Ret'] = df_port.sum(axis=1)
s = df_port['Port_Ret'] + 1
s_cum = s.cumprod()
s_cum.plot()
#------ Apply Fee & Tax --------
df = df[df.columns[:len(shortcd_lst)]]
weight.index = df.columns
df_qty = weight / df
df_qty_diff = df_qty.diff()
df_fee = abs(df_qty_diff) * df * 0.00015
df_fee['fee'] = df_fee.sum(axis=1)
df_qty_diff_negative = df_qty_diff.copy()
df_qty_diff_negative[df_qty_diff_negative > 0] = 0
df_tax = abs(df_qty_diff_negative) * df * 0.0030
df_tax['tax'] = df_tax.sum(axis=1)
s = df_port['Port_Ret'] + 1 - df_fee['fee'] - df_tax['tax']
s_cum = s.cumprod()
s_cum.plot()
#------Sharpe ratio optimize -----------
max_sr = -9999999
max_eigen_index = w.argmin()
for i in xrange(len(w)):
eigen_index = i
weight = v[:,eigen_index] / df_ret.std()
weight = weight / sum(weight)
df_port = df_ret * weight
df_port['Port_Ret'] = df_port.sum(axis=1)
df_port['Port_Ret'] = df_port['Port_Ret'] - df_fee['fee'] - df_tax['tax']
sr = df_port['Port_Ret'].mean() / df_port['Port_Ret'].std()
print 'Sharpe ratio: ', eigen_index ,sr
if max_sr < sr:
max_sr = sr
max_eigen_index = i
print 'max_sr: ', max_sr, ' max_eigen_index: ', max_eigen_index
#-------- optimizing back test ---------
prd = 21
hrz = 20
#df_port = pd.DataFrame(columns=df_ret.columns, index=df_ret.index[prd:len(df_ret)])
#for i in xrange(prd,len(df_ret)):
# df_sample = df_ret[i-prd:i-1]
# weight = runOptPCA(df_sample)
# df_port_new = list(df_ret.iloc[i]) * weight
# df_port.iloc[i-prd] = df_port_new
df_port = pd.DataFrame()
weight_prev = pd.Series()
for i in xrange(prd,len(df_ret),hrz):
df_ret_sample = df_ret[i-prd:i-1]
df_sample = df[i-prd:i-1]
try:
weight = runOptPCA(df_ret_sample)
except Warning as e:
print 'index of prd: ', i
df_port_new = df_ret[i:i+hrz] * weight
df_port_new['Port_Ret'] = df_port_new.sum(axis=1)
weight.index = df.columns
if i == prd:
weight_diff = weight.copy()
else:
weight_diff = weight - weight_prev
weight_fee = abs(weight_diff) * (0.00015 + 0.0010)
weight_negative = weight_diff[weight_diff < 0]
weight_tax = abs(weight_negative) * 0.0030
weight_prev = weight.copy()
df_qty = weight / df
df_qty_diff = df_qty.diff()
df_fee = abs(df_qty_diff) * df * (0.00015 + 0.0010)
df_fee['fee'] = df_fee.sum(axis=1)
df_qty_diff_negative = df_qty_diff.copy()
df_qty_diff_negative[df_qty_diff_negative > 0] = 0
df_tax = abs(df_qty_diff_negative) * df * 0.0030
df_tax['tax'] = df_tax.sum(axis=1)
df_port_new['Port_Ret'] = df_port_new['Port_Ret'] - df_fee['fee'] - df_tax['tax']
df_port_new['Port_Ret'][0] = df_port_new['Port_Ret'][0] - sum(weight_fee) - sum(weight_tax)
if len(df_port) > 0:
df_port = df_port.append(df_port_new)
else:
df_port = df_port_new.copy()
#df_port['Port_Ret'] = df_port.sum(axis=1)
s = df_port['Port_Ret'] + 1
s_cum = s.cumprod()
s_cum.plot()
s_cum_month = s_cum.resample('M')
s_cum_month_pct = s_cum_month.pct_change()
print s_cum_month_pct.mean(), s_cum_month_pct.std()
# s_cum_month_diff.index = s_cum_month_diff.index.to_period('M')
# s_cum_month_diff.plot(kind='bar')
#------- optimzie parm --------
prd_lst = [21, 41, 61, 121, 251]
hrz_lst = [5 ,10, 20]
for prd in prd_lst:
for hrz in hrz_lst:
df_port = pd.DataFrame()
weight_prev = pd.Series()
for i in xrange(prd,len(df_ret),hrz):
df_sample = df_ret[i-prd:i-1]
weight = runOptPCA(df_sample)
df_port_new = df_ret[i:i+hrz] * weight
df_port_new['Port_Ret'] = df_port_new.sum(axis=1)
weight.index = df.columns
if i == prd:
weight_diff = weight.copy()
else:
weight_diff = weight - weight_prev
weight_fee = abs(weight_diff) * (0.00015 + 0.0010)
weight_negative = weight_diff[weight_diff < 0]
weight_tax = abs(weight_negative) * 0.0030
weight_prev = weight.copy()
df_qty = weight / df
df_qty_diff = df_qty.diff()
df_fee = abs(df_qty_diff) * df * (0.00015 + 0.0010)
df_fee['fee'] = df_fee.sum(axis=1)
df_qty_diff_negative = df_qty_diff.copy()
df_qty_diff_negative[df_qty_diff_negative > 0] = 0
df_tax = abs(df_qty_diff_negative) * df * 0.0030
df_tax['tax'] = df_tax.sum(axis=1)
df_port_new['Port_Ret'] = df_port_new['Port_Ret'] - df_fee['fee'] - df_tax['tax']
df_port_new['Port_Ret'][0] = df_port_new['Port_Ret'][0] - sum(weight_fee) - sum(weight_tax)
if len(df_port) > 0:
df_port = df_port.append(df_port_new)
else:
df_port = df_port_new.copy()
#df_port['Port_Ret'] = df_port.sum(axis=1)
s = df_port['Port_Ret'] + 1
sr = df_port['Port_Ret'].mean() / df_port['Port_Ret'].std()
print 'prd: %d, hrz: %d, SR: %.4f' % (prd, hrz, sr)
# -*- coding: utf-8 -*-
"""
Created on Tue May 12 09:34:05 2015
@author: assa
"""
import sqlite3 as lite
import pandas as pd
def getdbdata(filedbname, shortcd_lst, start, end):
conn = lite.connect(filedbname)
data = {}
for shortcd in shortcd_lst:
sqltext = """
SELECT Date, Close
From StockPriceData
Where ShortCD = '%s'
and Date between '%s' and '%s'
""" %(shortcd, start, end)
df_new = pd.read_sql(sqltext, conn)
df_new['Close'] = df_new['Close'].astype(int)
df_new['Date'] = pd.to_datetime(df_new['Date'])
df_new.index = df_new['Date']
del df_new['Date']
data[shortcd] = df_new
panel = pd.Panel(data).swapaxes('items', 'minor')
return panel
pass
if __name__ == '__main__':
from read_shortcd import read_shortcd
filedbname = 'kosdaq100.db'
shortcd_lst = read_shortcd('kosdaq100.text')
panel = getdbdata(filedbname, shortcd_lst, '2010-01-01', '2015-05-10')
print panel['Close']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment