Skip to content

Instantly share code, notes, and snippets.

@ryogrid
Created March 26, 2017 00:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryogrid/3bd0d61e543835d9a9ba0ebdabf16d90 to your computer and use it in GitHub Desktop.
Save ryogrid/3bd0d61e543835d9a9ba0ebdabf16d90 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import numpy as np
import scipy.sparse
import xgboost as xgb
import pickle
import talib as ta
from datetime import datetime as dt
import pytz
INPUT_LEN = 3
TRAINDATA_DIV = 2
CHART_TYPE_JDG_LEN = 25
# 0->flat 1->upper line 2-> downer line 3->above is top 4->below is top
def judge_chart_type(data_arr):
max_val = 0
min_val = float("inf")
last_idx = len(data_arr)-1
for idx in xrange(len(data_arr)):
if data_arr[idx] > max_val:
max_val = data_arr[idx]
max_idx = idx
if data_arr[idx] < min_val:
min_val = data_arr[idx]
min_idx = idx
if max_val == min_val:
return 0
if min_idx == 0 and max_idx == last_idx:
return 1
if max_idx == 0 and min_idx == last_idx:
return 2
if max_idx != 0 and max_idx != last_idx and min_idx != 0 and min_idx != last_idx:
return 0
if max_idx != 0 and max_idx != last_idx:
return 3
if min_idx != 0 and min_idx != last_idx:
return 4
return 0
def get_rsi(price_arr, cur_pos, period = 40):
if cur_pos <= period:
# s = 0
return 0
else:
s = cur_pos - (period + 1)
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.RSI(prices, timeperiod = period)[-1]
def get_ma(price_arr, cur_pos, period = 20):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.SMA(prices, timeperiod = period)[-1]
def get_ma_kairi(price_arr, cur_pos, period = None):
ma = get_ma(price_arr, cur_pos)
return ((price_arr[cur_pos] - ma) / ma) * 100.0
return 0
def get_bb_1(price_arr, cur_pos, period = 40):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.BBANDS(prices, timeperiod = period)[0][-1]
def get_bb_2(price_arr, cur_pos, period = 40):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.BBANDS(prices, timeperiod = period)[2][-1]
def get_ema(price_arr, cur_pos, period = 20):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.EMA(prices, timeperiod = period)[-1]
def get_ema_rsi(price_arr, cur_pos, period = None):
return 0
def get_cci(price_arr, cur_pos, period = None):
return 0
def get_mo(price_arr, cur_pos, period = 20):
if cur_pos <= (period + 1):
# s = 0
return 0
else:
s = cur_pos - (period + 1)
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.CMO(prices, timeperiod = period)[-1]
def get_po(price_arr, cur_pos, period = 10):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.PPO(prices)[-1]
def get_lw(price_arr, cur_pos, period = None):
return 0
def get_ss(price_arr, cur_pos, period = None):
return 0
def get_dmi(price_arr, cur_pos, period = None):
return 0
def get_vorarity(price_arr, cur_pos, period = None):
tmp_arr = []
prev = -1
for val in price_arr[cur_pos-CHART_TYPE_JDG_LEN:cur_pos]:
if prev == -1:
tmp_arr.append(0)
else:
tmp_arr.append(val - prev)
prev = val
return np.std(tmp_arr)
def get_macd(price_arr, cur_pos, period = 100):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
macd, macdsignal, macdhist = ta.MACD(prices,fastperiod=12, slowperiod=26, signalperiod=9)
if macd[-1] > macdsignal[-1]:
return 1
else:
return 0
"""
main
"""
rates_fd = open('./calm_dji_2009.csv', 'r')
exchange_dates = []
calm_rates = []
calm_rates_diff = []
exchange_rates = []
exchange_rates_diff = []
prev_calm = 0
prev_exch = 10000
for line in rates_fd:
splited = line.split(",")
time = splited[0]
calm_val = float(splited[1])
exch_val = float(splited[2])
exchange_dates.append(time)
calm_rates.append(calm_val)
calm_rates_diff.append(calm_val - prev_calm)
exchange_rates.append(exch_val)
exchange_rates_diff.append(exch_val - prev_exch)
prev_calm = calm_val
prev_exch = exch_val
data_len = len(exchange_rates)
train_len = len(exchange_rates)/TRAINDATA_DIV
print "data size: " + str(data_len)
print "train len: " + str(train_len)
bst_tec = xgb.Booster({'nthread':4})
bst_tec.load_model("./dji.model")
if True: ### training start
tr_input_mat = []
tr_angle_mat = []
prev_pred = 0
for ii in xrange(INPUT_LEN, train_len):
tmp_arr = []
for jj in xrange(INPUT_LEN):
tmp_arr.append(exchange_rates_diff[ii-INPUT_LEN+jj])
for jj in xrange(INPUT_LEN):
tmp_arr.append(calm_rates_diff[ii-INPUT_LEN+jj])
tr_input_mat.append(tmp_arr)
if exchange_rates_diff[ii + 1] >= 0:
tr_angle_mat.append(1)
else:
tr_angle_mat.append(0)
tr_input_arr = np.array(tr_input_mat)
tr_angle_arr = np.array(tr_angle_mat)
dtrain = xgb.DMatrix(tr_input_arr, label=tr_angle_arr)
param = {'max_depth':6, 'eta':0.2, 'subsumble':0.5, 'silent':1, 'objective':'binary:logistic' }
watchlist = [(dtrain,'train')]
num_round = 3000 #3000 #10 #3000 # 1000
bst = xgb.train(param, dtrain, num_round, watchlist)
try_cnt = 0
correct_cnt = 0
for ii in xrange(train_len - INPUT_LEN, data_len - 1):
if exchange_rates[ii + 1] == 0:
pass
# prediction(1)-------------------------------------------
ts_input_mat = []
tmp_arr = []
for jj in xrange(INPUT_LEN):
tmp_arr.append(exchange_rates_diff[ii-INPUT_LEN+jj])
for jj in xrange(INPUT_LEN):
tmp_arr.append(calm_rates_diff[ii-INPUT_LEN+jj])
tr_input_mat.append(tmp_arr)
ts_input_arr = np.array(ts_input_mat)
dtest = xgb.DMatrix(ts_input_arr)
pred = bst.predict(dtest)
predicted_prob_funda = pred[0]
if predicted_prob_funda >= 0.5:
predicted_angle_funda = 1
else:
predicted_angle_funda = 0
#--------------------------------------------------------
# prediction(2)------------------------------------------
if exchange_rates[ii-1] != 0:
last_not_zero_rate = exchange_rates[ii-1]
ts_input_mat = []
ts_input_mat.append(
[exchange_rates[ii],
(exchange_rates[ii] - last_not_zero_rate)/last_not_zero_rate,
get_rsi(exchange_rates, ii),
get_ma(exchange_rates, ii),
get_ma_kairi(exchange_rates, ii),
get_bb_1(exchange_rates, ii),
get_bb_2(exchange_rates, ii),
get_ema(exchange_rates, ii),
get_ema_rsi(exchange_rates, ii),
get_cci(exchange_rates, ii),
get_mo(exchange_rates, ii),
get_lw(exchange_rates, ii),
get_ss(exchange_rates, ii),
get_dmi(exchange_rates, ii),
get_vorarity(exchange_rates, ii),
get_macd(exchange_rates, ii),
judge_chart_type(exchange_rates[ii-CHART_TYPE_JDG_LEN:ii])]
)
ts_input_arr = np.array(ts_input_mat)
dtest = xgb.DMatrix(ts_input_arr)
pred = bst_tec.predict(dtest)
predicted_prob_tec = pred[0]
if predicted_prob_tec >= 0.5:
predicted_angle_tec = 1
else:
predicted_angle_tec = 0
#--------------------------------------------------------
if predicted_angle_funda == predicted_angle_tec:
try_cnt += 1
if exchange_rates_diff[ii+1] >= 0 and predicted_angle_funda == 1:
correct_cnt += 1
if exchange_rates_diff[ii+1] < 0 and predicted_angle_funda == 0:
correct_cnt += 1
print(correct_cnt/float(try_cnt))
#!/usr/bin/python
import numpy as np
import scipy.sparse
import xgboost as xgb
import pickle
import talib as ta
from datetime import datetime as dt
import pytz
OUTPUT_LEN = 1
TRAINDATA_DIV = 10
CHART_TYPE_JDG_LEN = 25
# 0->flat 1->upper line 2-> downer line 3->above is top 4->below is top
def judge_chart_type(data_arr):
max_val = 0
min_val = float("inf")
last_idx = len(data_arr)-1
for idx in xrange(len(data_arr)):
if data_arr[idx] > max_val:
max_val = data_arr[idx]
max_idx = idx
if data_arr[idx] < min_val:
min_val = data_arr[idx]
min_idx = idx
if max_val == min_val:
return 0
if min_idx == 0 and max_idx == last_idx:
return 1
if max_idx == 0 and min_idx == last_idx:
return 2
if max_idx != 0 and max_idx != last_idx and min_idx != 0 and min_idx != last_idx:
return 0
if max_idx != 0 and max_idx != last_idx:
return 3
if min_idx != 0 and min_idx != last_idx:
return 4
return 0
def get_rsi(price_arr, cur_pos, period = 40):
if cur_pos <= period:
# s = 0
return 0
else:
s = cur_pos - (period + 1)
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.RSI(prices, timeperiod = period)[-1]
def get_ma(price_arr, cur_pos, period = 20):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.SMA(prices, timeperiod = period)[-1]
def get_ma_kairi(price_arr, cur_pos, period = None):
ma = get_ma(price_arr, cur_pos)
return ((price_arr[cur_pos] - ma) / ma) * 100.0
return 0
def get_bb_1(price_arr, cur_pos, period = 40):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.BBANDS(prices, timeperiod = period)[0][-1]
def get_bb_2(price_arr, cur_pos, period = 40):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.BBANDS(prices, timeperiod = period)[2][-1]
def get_ema(price_arr, cur_pos, period = 20):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.EMA(prices, timeperiod = period)[-1]
def get_ema_rsi(price_arr, cur_pos, period = None):
return 0
def get_cci(price_arr, cur_pos, period = None):
return 0
def get_mo(price_arr, cur_pos, period = 20):
if cur_pos <= (period + 1):
# s = 0
return 0
else:
s = cur_pos - (period + 1)
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.CMO(prices, timeperiod = period)[-1]
def get_po(price_arr, cur_pos, period = 10):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
return ta.PPO(prices)[-1]
def get_lw(price_arr, cur_pos, period = None):
return 0
def get_ss(price_arr, cur_pos, period = None):
return 0
def get_dmi(price_arr, cur_pos, period = None):
return 0
def get_vorarity(price_arr, cur_pos, period = None):
tmp_arr = []
prev = -1
for val in price_arr[cur_pos-CHART_TYPE_JDG_LEN:cur_pos]:
if prev == -1:
tmp_arr.append(0)
else:
tmp_arr.append(val - prev)
prev = val
return np.std(tmp_arr)
def get_macd(price_arr, cur_pos, period = 100):
if cur_pos <= period:
s = 0
else:
s = cur_pos - period
tmp_arr = price_arr[s:cur_pos]
tmp_arr.reverse()
prices = np.array(tmp_arr, dtype=float)
macd, macdsignal, macdhist = ta.MACD(prices,fastperiod=12, slowperiod=26, signalperiod=9)
if macd[-1] > macdsignal[-1]:
return 1
else:
return 0
def is_weekend(date_str):
tz = pytz.timezone('Asia/Tokyo')
dstr = date_str.replace(".","-")
tdatetime = dt.strptime(dstr, '%Y-%m-%d %H:%M:%S')
tz_time = tz.localize(tdatetime)
london_tz = pytz.timezone('Europe/London')
london_time = tz_time.astimezone(london_tz)
week = london_time.weekday()
return (week == 5 or week == 6)
"""
main
"""
rates_fd = open('./dji2000_2009_close.csv', 'r')
exchange_dates = []
exchange_rates = []
for line in rates_fd:
splited = line.split(",")
time = splited[0]
val = float(splited[1])
exchange_dates.append(time)
exchange_rates.append(val)
reverse_exchange_rates = []
prev_org = -1
prev = -1
for rate in exchange_rates:
if prev_org != -1:
diff = rate - prev_org
reverse_exchange_rates.append(prev - diff)
prev_org = rate
prev = prev - diff
else:
reverse_exchange_rates.append(rate)
prev_org = rate
prev = rate
data_len = len(exchange_rates)
train_len = data_len - 126
print "data size: " + str(data_len)
print "train len: " + str(train_len)
if False:
bst = xgb.Booster({'nthread':4})
bst.load_model("./dji.model")
if True: ### training start
tr_input_mat = []
tr_angle_mat = []
for i in xrange(50, train_len, OUTPUT_LEN):
tr_input_mat.append(
[exchange_rates[i],
(exchange_rates[i] - exchange_rates[i - 1])/exchange_rates[i - 1],
get_rsi(exchange_rates, i),
get_ma(exchange_rates, i),
get_ma_kairi(exchange_rates, i),
get_bb_1(exchange_rates, i),
get_bb_2(exchange_rates, i),
get_ema(exchange_rates, i),
get_ema_rsi(exchange_rates, i),
get_cci(exchange_rates, i),
get_mo(exchange_rates, i),
get_lw(exchange_rates, i),
get_ss(exchange_rates, i),
get_dmi(exchange_rates, i),
get_vorarity(exchange_rates, i),
get_macd(exchange_rates, i),
judge_chart_type(exchange_rates[i-CHART_TYPE_JDG_LEN:i])
]
)
tr_input_mat.append(
[reverse_exchange_rates[i],
(reverse_exchange_rates[i] - reverse_exchange_rates[i - 1])/reverse_exchange_rates[i - 1],
get_rsi(reverse_exchange_rates, i),
get_ma(reverse_exchange_rates, i),
get_ma_kairi(reverse_exchange_rates, i),
get_bb_1(reverse_exchange_rates, i),
get_bb_2(reverse_exchange_rates, i),
get_ema(reverse_exchange_rates, i),
get_ema_rsi(reverse_exchange_rates, i),
get_cci(reverse_exchange_rates, i),
get_mo(reverse_exchange_rates, i),
get_lw(reverse_exchange_rates, i),
get_ss(reverse_exchange_rates, i),
get_dmi(reverse_exchange_rates, i),
get_vorarity(reverse_exchange_rates, i),
get_macd(reverse_exchange_rates, i),
judge_chart_type(reverse_exchange_rates[i-CHART_TYPE_JDG_LEN:i])
]
)
tmp = (exchange_rates[i+OUTPUT_LEN] - exchange_rates[i])/float(OUTPUT_LEN)
if tmp >= 0:
tr_angle_mat.append(1)
else:
tr_angle_mat.append(0)
tmp = (reverse_exchange_rates[i+OUTPUT_LEN] - reverse_exchange_rates[i])/float(OUTPUT_LEN)
if tmp >= 0:
tr_angle_mat.append(1)
else:
tr_angle_mat.append(0)
tr_input_arr = np.array(tr_input_mat)
tr_angle_arr = np.array(tr_angle_mat)
dtrain = xgb.DMatrix(tr_input_arr, label=tr_angle_arr)
param = {'max_depth':6, 'eta':0.2, 'subsumble':0.5, 'silent':1, 'objective':'binary:logistic' }
watchlist = [(dtrain,'train')]
num_round = 3000
bst = xgb.train(param, dtrain, num_round, watchlist)
bst.dump_model('./dump.raw.txt')
bst.save_model('./dji.model')
### training end
all_cnt = 0
correct_cnt = 0
for window_s in xrange((data_len - train_len) - (OUTPUT_LEN + 1)):
current_spot = train_len + window_s + OUTPUT_LEN
# prediction
ts_input_mat = []
ts_input_mat.append(
[exchange_rates[current_spot],
(exchange_rates[current_spot] - exchange_rates[current_spot - 1])/exchange_rates[current_spot - 1],
get_rsi(exchange_rates, current_spot),
get_ma(exchange_rates, current_spot),
get_ma_kairi(exchange_rates, current_spot),
get_bb_1(exchange_rates, current_spot),
get_bb_2(exchange_rates, current_spot),
get_ema(exchange_rates, current_spot),
get_ema_rsi(exchange_rates, current_spot),
get_cci(exchange_rates, current_spot),
get_mo(exchange_rates, current_spot),
get_lw(exchange_rates, current_spot),
get_ss(exchange_rates, current_spot),
get_dmi(exchange_rates, current_spot),
get_vorarity(exchange_rates, current_spot),
get_macd(exchange_rates, current_spot),
judge_chart_type(exchange_rates[current_spot-CHART_TYPE_JDG_LEN:current_spot])]
)
ts_input_arr = np.array(ts_input_mat)
dtest = xgb.DMatrix(ts_input_arr)
pred = bst.predict(dtest)
predicted_prob = pred[0]
all_cnt += 1
diff = exchange_rates[current_spot + 1] - exchange_rates[current_spot]
if predicted_prob >= 0.5 and diff >= 0:
correct_cnt += 1
elif predicted_prob < 0.5 and diff < 0:
correct_cnt += 1
print correct_cnt/float(all_cnt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment