Skip to content

Instantly share code, notes, and snippets.

@fanannan
Created February 23, 2017 01:07
Show Gist options
  • Save fanannan/4fa0f15ad061d0554cd22c0199c8d1db to your computer and use it in GitHub Desktop.
Save fanannan/4fa0f15ad061d0554cd22c0199c8d1db to your computer and use it in GitHub Desktop.
vxx intraday / overnight movement
%matplotlib inline
import os
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import glob
from joblib import Memory
from pandas_datareader import wb
import datetime as dt
memory = Memory('/tmp/')
mpl.rcParams.update({'font.size': 10})
DATA_FILE_PATH = './'
def load(file_path):
df = None
try:
df = pd.read_csv(file_path)
except:
pass
return df
def get_file_list(path):
l = list()
for d in path:
p = os.path.join(d, '*.txt*')
l += glob.glob(p)
return l
@memory.cache
def read_all(path):
return [load(f) for f in get_file_list(path)]
def read_date(s):
dts = dt.datetime.strptime(s, '%Y-%m-%d')
return dt.date(dts.year, dts.month, dts.day)
def to_weekday(s):
td = read_date(s)
return td.weekday()
def find_business_day(s, lis):
today = read_date(s)
if today.weekday() != 4:
next_day = today+dt.timedelta(days=1)
else:
next_day = today+dt.timedelta(days=3)
nds = next_day.strftime('%Y-%m-%d')
return nds if nds in lis else None
#@memory.cache
def conv_all(path):
time_list = [10,11,12,13,14,15,16]
l = list()
dfs = read_all(path)
tt =set()
for df in dfs:
if df is not None:
for date in set(df['Date'].values):
dfd = df[df['Date']==date]
next_date = find_business_day(date, df['Date'].values)
# 始値(夏時間対応注意)
flag_summer = '14:35:00' in dfd.columns
df_open = dfd[dfd['Time'] == ('14:35:00' if flag_summer else '15:35:00')]
dfa = df_open[['Date', 'Open']].set_index('Date').rename(columns={'Open':'p0930'})
# 時刻別データの列名付け替え
lis = [dfa]
for time in time_list:
rtime = str(time + ((14-9) if flag_summer else (15-9))) + ':00:00'
ptime = 'p'+str(time)+ '00'
#print(rtime)
p = dfd[dfd['Time']==rtime][['Date', 'Close']].set_index('Date').rename(columns={'Close': ptime})
lis.append(p)
if next_date is not None:
dfn = df[df['Date']==next_date] if next_date is not None else None
df_next_open = dfn[dfn['Time'] == ('14:35:00' if flag_summer else '15:35:00')] # 夏時間と冬時間の境目は無視
q = df_next_open[['Date', 'Open']].set_index('Date').rename(columns={'Open':'p3330'})
q = q.values[0] if len(q) > 0 else np.nan
# q.index = [date]
#else:
# q = pd.DataFrame([np.nan], index=[date], columns=['p0930n'])
q = pd.DataFrame([np.nan if next_date is None else q], index=[date], columns=['p3330'])
lis.append(q)
#print(lis)
df_prices = pd.concat(lis, axis=1)
# 変化率
open_price = df_prices['p0930']
last_price = open_price
for time in time_list:
ptime = 'p'+str(time)+ '00'
ctime = 'c'+str(time)+ '00'
rtime = 'r'+str(time)+ '00'
p = df_prices[ptime]
df_prices[ctime] = (p / open_price).apply(np.log)
df_prices[rtime] = (p / last_price).apply(np.log)
del df_prices[ptime]
last_price = p
if True:
ptime = 'p3330'
ctime = 'c3330'
rtime = 'r3330'
p = df_prices[ptime]
df_prices[ctime] = (p / open_price).apply(np.log)
df_prices[rtime] = (p / last_price).apply(np.log)
del df_prices[ptime]
#print(df_prices)
#del df_prices['p3330']
df_prices = df_prices.dropna()
if len(df_prices.index) > 0:
l.append(df_prices)
r = pd.concat(l, axis = 0).drop_duplicates()
#print(sorted(list(tt)))
return r
df_data = conv_all(DATA_FILE_PATH)
PERCENTILES = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
def get_cumulative(df):
return df.describe(percentiles=PERCENTILES)[['c1000','c1100','c1200','c1300','c1400','c1500','c1600', 'c3330']]
def get_relative(df):
return df.describe(percentiles=PERCENTILES)[['r1000','r1100','r1200','r1300','r1400','r1500','r1600', 'r3330']]
def explain(df_data, opt=""):
df_cumulative = get_cumulative(df_data)
df_relative = get_relative(df_data)
fig = plt.figure(figsize=(12, 4))
ax = fig.add_subplot(1,2,1)
ax.set_ylim((-0.05, 0.05))
draw(ax, df_cumulative, 'cumulative', opt)
bx = fig.add_subplot(1,2,2)
bx.set_ylim((-0.02, 0.02))
draw(bx, df_relative, 'relative', opt)
def draw(ax, df_summary, title, opt):
for c in df_summary.columns:
df_summary.rename(columns={c: int(c[1:])}, inplace=True)
ax.set_title(opt+title+' movement')
ax.grid()
if title != 'relative':
ya = [0]
ia = [930]
else:
ya = ia = []
y = ya+df_summary.ix['mean',:].values.tolist()
i = ia+df_summary.ix['mean',:].index.tolist()
ax.plot(i, y, linewidth=4)
#ax.plot(df_summary.ix['mean',:], linewidth=4)
for p in PERCENTILES:
y = ya+df_summary.ix['{}%'.format(int(p*100)),:].values.tolist()
ax.plot(i, y)
explain(df_data)
def explain_by_weekday(df_data):
df_wd = df_data.copy()
df_wd['weekday'] = [to_weekday(t) for t in df_wd.index]
for d in range(5):
df = df_wd[df_wd['weekday'] == d]
print(d, len(df))
explain(df, ['mon','tue','wed','thr','fri'][d]+'day / ')
explain_by_weekday(df_data)
def to_day(s):
tdt = dt.datetime.strptime(s, '%Y-%m-%d')
return tdt.day
def explain_by_day(df_data):
df_wd = df_data.copy()
df_wd['day'] = [to_day(t) for t in df_wd.index]
df = df_wd[df_wd['day']<=10]
explain(df, 'early month / ')
df = df_wd[df_wd['day']>10]
df = df[df['day']<20]
explain(df, 'mid month / ')
df = df_wd[df_wd['day']>=20]
explain(df, 'late month / ')
explain_by_day(df_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment