Created
February 23, 2017 01:07
-
-
Save fanannan/4fa0f15ad061d0554cd22c0199c8d1db to your computer and use it in GitHub Desktop.
vxx intraday / overnight movement
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%matplotlib inline | |
import os | |
import numpy as np | |
import matplotlib as mpl | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import glob | |
from joblib import Memory | |
from pandas_datareader import wb | |
import datetime as dt | |
memory = Memory('/tmp/') | |
mpl.rcParams.update({'font.size': 10}) | |
DATA_FILE_PATH = './' | |
def load(file_path): | |
df = None | |
try: | |
df = pd.read_csv(file_path) | |
except: | |
pass | |
return df | |
def get_file_list(path): | |
l = list() | |
for d in path: | |
p = os.path.join(d, '*.txt*') | |
l += glob.glob(p) | |
return l | |
@memory.cache | |
def read_all(path): | |
return [load(f) for f in get_file_list(path)] | |
def read_date(s): | |
dts = dt.datetime.strptime(s, '%Y-%m-%d') | |
return dt.date(dts.year, dts.month, dts.day) | |
def to_weekday(s): | |
td = read_date(s) | |
return td.weekday() | |
def find_business_day(s, lis): | |
today = read_date(s) | |
if today.weekday() != 4: | |
next_day = today+dt.timedelta(days=1) | |
else: | |
next_day = today+dt.timedelta(days=3) | |
nds = next_day.strftime('%Y-%m-%d') | |
return nds if nds in lis else None | |
#@memory.cache | |
def conv_all(path): | |
time_list = [10,11,12,13,14,15,16] | |
l = list() | |
dfs = read_all(path) | |
tt =set() | |
for df in dfs: | |
if df is not None: | |
for date in set(df['Date'].values): | |
dfd = df[df['Date']==date] | |
next_date = find_business_day(date, df['Date'].values) | |
# 始値(夏時間対応注意) | |
flag_summer = '14:35:00' in dfd.columns | |
df_open = dfd[dfd['Time'] == ('14:35:00' if flag_summer else '15:35:00')] | |
dfa = df_open[['Date', 'Open']].set_index('Date').rename(columns={'Open':'p0930'}) | |
# 時刻別データの列名付け替え | |
lis = [dfa] | |
for time in time_list: | |
rtime = str(time + ((14-9) if flag_summer else (15-9))) + ':00:00' | |
ptime = 'p'+str(time)+ '00' | |
#print(rtime) | |
p = dfd[dfd['Time']==rtime][['Date', 'Close']].set_index('Date').rename(columns={'Close': ptime}) | |
lis.append(p) | |
if next_date is not None: | |
dfn = df[df['Date']==next_date] if next_date is not None else None | |
df_next_open = dfn[dfn['Time'] == ('14:35:00' if flag_summer else '15:35:00')] # 夏時間と冬時間の境目は無視 | |
q = df_next_open[['Date', 'Open']].set_index('Date').rename(columns={'Open':'p3330'}) | |
q = q.values[0] if len(q) > 0 else np.nan | |
# q.index = [date] | |
#else: | |
# q = pd.DataFrame([np.nan], index=[date], columns=['p0930n']) | |
q = pd.DataFrame([np.nan if next_date is None else q], index=[date], columns=['p3330']) | |
lis.append(q) | |
#print(lis) | |
df_prices = pd.concat(lis, axis=1) | |
# 変化率 | |
open_price = df_prices['p0930'] | |
last_price = open_price | |
for time in time_list: | |
ptime = 'p'+str(time)+ '00' | |
ctime = 'c'+str(time)+ '00' | |
rtime = 'r'+str(time)+ '00' | |
p = df_prices[ptime] | |
df_prices[ctime] = (p / open_price).apply(np.log) | |
df_prices[rtime] = (p / last_price).apply(np.log) | |
del df_prices[ptime] | |
last_price = p | |
if True: | |
ptime = 'p3330' | |
ctime = 'c3330' | |
rtime = 'r3330' | |
p = df_prices[ptime] | |
df_prices[ctime] = (p / open_price).apply(np.log) | |
df_prices[rtime] = (p / last_price).apply(np.log) | |
del df_prices[ptime] | |
#print(df_prices) | |
#del df_prices['p3330'] | |
df_prices = df_prices.dropna() | |
if len(df_prices.index) > 0: | |
l.append(df_prices) | |
r = pd.concat(l, axis = 0).drop_duplicates() | |
#print(sorted(list(tt))) | |
return r | |
df_data = conv_all(DATA_FILE_PATH) | |
PERCENTILES = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] | |
def get_cumulative(df): | |
return df.describe(percentiles=PERCENTILES)[['c1000','c1100','c1200','c1300','c1400','c1500','c1600', 'c3330']] | |
def get_relative(df): | |
return df.describe(percentiles=PERCENTILES)[['r1000','r1100','r1200','r1300','r1400','r1500','r1600', 'r3330']] | |
def explain(df_data, opt=""): | |
df_cumulative = get_cumulative(df_data) | |
df_relative = get_relative(df_data) | |
fig = plt.figure(figsize=(12, 4)) | |
ax = fig.add_subplot(1,2,1) | |
ax.set_ylim((-0.05, 0.05)) | |
draw(ax, df_cumulative, 'cumulative', opt) | |
bx = fig.add_subplot(1,2,2) | |
bx.set_ylim((-0.02, 0.02)) | |
draw(bx, df_relative, 'relative', opt) | |
def draw(ax, df_summary, title, opt): | |
for c in df_summary.columns: | |
df_summary.rename(columns={c: int(c[1:])}, inplace=True) | |
ax.set_title(opt+title+' movement') | |
ax.grid() | |
if title != 'relative': | |
ya = [0] | |
ia = [930] | |
else: | |
ya = ia = [] | |
y = ya+df_summary.ix['mean',:].values.tolist() | |
i = ia+df_summary.ix['mean',:].index.tolist() | |
ax.plot(i, y, linewidth=4) | |
#ax.plot(df_summary.ix['mean',:], linewidth=4) | |
for p in PERCENTILES: | |
y = ya+df_summary.ix['{}%'.format(int(p*100)),:].values.tolist() | |
ax.plot(i, y) | |
explain(df_data) | |
def explain_by_weekday(df_data): | |
df_wd = df_data.copy() | |
df_wd['weekday'] = [to_weekday(t) for t in df_wd.index] | |
for d in range(5): | |
df = df_wd[df_wd['weekday'] == d] | |
print(d, len(df)) | |
explain(df, ['mon','tue','wed','thr','fri'][d]+'day / ') | |
explain_by_weekday(df_data) | |
def to_day(s): | |
tdt = dt.datetime.strptime(s, '%Y-%m-%d') | |
return tdt.day | |
def explain_by_day(df_data): | |
df_wd = df_data.copy() | |
df_wd['day'] = [to_day(t) for t in df_wd.index] | |
df = df_wd[df_wd['day']<=10] | |
explain(df, 'early month / ') | |
df = df_wd[df_wd['day']>10] | |
df = df[df['day']<20] | |
explain(df, 'mid month / ') | |
df = df_wd[df_wd['day']>=20] | |
explain(df, 'late month / ') | |
explain_by_day(df_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment