Created
March 26, 2017 00:50
-
-
Save ryogrid/3bd0d61e543835d9a9ba0ebdabf16d90 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import numpy as np | |
import scipy.sparse | |
import xgboost as xgb | |
import pickle | |
import talib as ta | |
from datetime import datetime as dt | |
import pytz | |
INPUT_LEN = 3 | |
TRAINDATA_DIV = 2 | |
CHART_TYPE_JDG_LEN = 25 | |
# 0->flat 1->upper line 2-> downer line 3->above is top 4->below is top | |
def judge_chart_type(data_arr): | |
max_val = 0 | |
min_val = float("inf") | |
last_idx = len(data_arr)-1 | |
for idx in xrange(len(data_arr)): | |
if data_arr[idx] > max_val: | |
max_val = data_arr[idx] | |
max_idx = idx | |
if data_arr[idx] < min_val: | |
min_val = data_arr[idx] | |
min_idx = idx | |
if max_val == min_val: | |
return 0 | |
if min_idx == 0 and max_idx == last_idx: | |
return 1 | |
if max_idx == 0 and min_idx == last_idx: | |
return 2 | |
if max_idx != 0 and max_idx != last_idx and min_idx != 0 and min_idx != last_idx: | |
return 0 | |
if max_idx != 0 and max_idx != last_idx: | |
return 3 | |
if min_idx != 0 and min_idx != last_idx: | |
return 4 | |
return 0 | |
def get_rsi(price_arr, cur_pos, period = 40): | |
if cur_pos <= period: | |
# s = 0 | |
return 0 | |
else: | |
s = cur_pos - (period + 1) | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.RSI(prices, timeperiod = period)[-1] | |
def get_ma(price_arr, cur_pos, period = 20): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.SMA(prices, timeperiod = period)[-1] | |
def get_ma_kairi(price_arr, cur_pos, period = None): | |
ma = get_ma(price_arr, cur_pos) | |
return ((price_arr[cur_pos] - ma) / ma) * 100.0 | |
return 0 | |
def get_bb_1(price_arr, cur_pos, period = 40): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.BBANDS(prices, timeperiod = period)[0][-1] | |
def get_bb_2(price_arr, cur_pos, period = 40): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.BBANDS(prices, timeperiod = period)[2][-1] | |
def get_ema(price_arr, cur_pos, period = 20): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.EMA(prices, timeperiod = period)[-1] | |
def get_ema_rsi(price_arr, cur_pos, period = None): | |
return 0 | |
def get_cci(price_arr, cur_pos, period = None): | |
return 0 | |
def get_mo(price_arr, cur_pos, period = 20): | |
if cur_pos <= (period + 1): | |
# s = 0 | |
return 0 | |
else: | |
s = cur_pos - (period + 1) | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.CMO(prices, timeperiod = period)[-1] | |
def get_po(price_arr, cur_pos, period = 10): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.PPO(prices)[-1] | |
def get_lw(price_arr, cur_pos, period = None): | |
return 0 | |
def get_ss(price_arr, cur_pos, period = None): | |
return 0 | |
def get_dmi(price_arr, cur_pos, period = None): | |
return 0 | |
def get_vorarity(price_arr, cur_pos, period = None): | |
tmp_arr = [] | |
prev = -1 | |
for val in price_arr[cur_pos-CHART_TYPE_JDG_LEN:cur_pos]: | |
if prev == -1: | |
tmp_arr.append(0) | |
else: | |
tmp_arr.append(val - prev) | |
prev = val | |
return np.std(tmp_arr) | |
def get_macd(price_arr, cur_pos, period = 100): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
macd, macdsignal, macdhist = ta.MACD(prices,fastperiod=12, slowperiod=26, signalperiod=9) | |
if macd[-1] > macdsignal[-1]: | |
return 1 | |
else: | |
return 0 | |
""" | |
main | |
""" | |
rates_fd = open('./calm_dji_2009.csv', 'r') | |
exchange_dates = [] | |
calm_rates = [] | |
calm_rates_diff = [] | |
exchange_rates = [] | |
exchange_rates_diff = [] | |
prev_calm = 0 | |
prev_exch = 10000 | |
for line in rates_fd: | |
splited = line.split(",") | |
time = splited[0] | |
calm_val = float(splited[1]) | |
exch_val = float(splited[2]) | |
exchange_dates.append(time) | |
calm_rates.append(calm_val) | |
calm_rates_diff.append(calm_val - prev_calm) | |
exchange_rates.append(exch_val) | |
exchange_rates_diff.append(exch_val - prev_exch) | |
prev_calm = calm_val | |
prev_exch = exch_val | |
data_len = len(exchange_rates) | |
train_len = len(exchange_rates)/TRAINDATA_DIV | |
print "data size: " + str(data_len) | |
print "train len: " + str(train_len) | |
bst_tec = xgb.Booster({'nthread':4}) | |
bst_tec.load_model("./dji.model") | |
if True: ### training start | |
tr_input_mat = [] | |
tr_angle_mat = [] | |
prev_pred = 0 | |
for ii in xrange(INPUT_LEN, train_len): | |
tmp_arr = [] | |
for jj in xrange(INPUT_LEN): | |
tmp_arr.append(exchange_rates_diff[ii-INPUT_LEN+jj]) | |
for jj in xrange(INPUT_LEN): | |
tmp_arr.append(calm_rates_diff[ii-INPUT_LEN+jj]) | |
tr_input_mat.append(tmp_arr) | |
if exchange_rates_diff[ii + 1] >= 0: | |
tr_angle_mat.append(1) | |
else: | |
tr_angle_mat.append(0) | |
tr_input_arr = np.array(tr_input_mat) | |
tr_angle_arr = np.array(tr_angle_mat) | |
dtrain = xgb.DMatrix(tr_input_arr, label=tr_angle_arr) | |
param = {'max_depth':6, 'eta':0.2, 'subsumble':0.5, 'silent':1, 'objective':'binary:logistic' } | |
watchlist = [(dtrain,'train')] | |
num_round = 3000 #3000 #10 #3000 # 1000 | |
bst = xgb.train(param, dtrain, num_round, watchlist) | |
try_cnt = 0 | |
correct_cnt = 0 | |
for ii in xrange(train_len - INPUT_LEN, data_len - 1): | |
if exchange_rates[ii + 1] == 0: | |
pass | |
# prediction(1)------------------------------------------- | |
ts_input_mat = [] | |
tmp_arr = [] | |
for jj in xrange(INPUT_LEN): | |
tmp_arr.append(exchange_rates_diff[ii-INPUT_LEN+jj]) | |
for jj in xrange(INPUT_LEN): | |
tmp_arr.append(calm_rates_diff[ii-INPUT_LEN+jj]) | |
tr_input_mat.append(tmp_arr) | |
ts_input_arr = np.array(ts_input_mat) | |
dtest = xgb.DMatrix(ts_input_arr) | |
pred = bst.predict(dtest) | |
predicted_prob_funda = pred[0] | |
if predicted_prob_funda >= 0.5: | |
predicted_angle_funda = 1 | |
else: | |
predicted_angle_funda = 0 | |
#-------------------------------------------------------- | |
# prediction(2)------------------------------------------ | |
if exchange_rates[ii-1] != 0: | |
last_not_zero_rate = exchange_rates[ii-1] | |
ts_input_mat = [] | |
ts_input_mat.append( | |
[exchange_rates[ii], | |
(exchange_rates[ii] - last_not_zero_rate)/last_not_zero_rate, | |
get_rsi(exchange_rates, ii), | |
get_ma(exchange_rates, ii), | |
get_ma_kairi(exchange_rates, ii), | |
get_bb_1(exchange_rates, ii), | |
get_bb_2(exchange_rates, ii), | |
get_ema(exchange_rates, ii), | |
get_ema_rsi(exchange_rates, ii), | |
get_cci(exchange_rates, ii), | |
get_mo(exchange_rates, ii), | |
get_lw(exchange_rates, ii), | |
get_ss(exchange_rates, ii), | |
get_dmi(exchange_rates, ii), | |
get_vorarity(exchange_rates, ii), | |
get_macd(exchange_rates, ii), | |
judge_chart_type(exchange_rates[ii-CHART_TYPE_JDG_LEN:ii])] | |
) | |
ts_input_arr = np.array(ts_input_mat) | |
dtest = xgb.DMatrix(ts_input_arr) | |
pred = bst_tec.predict(dtest) | |
predicted_prob_tec = pred[0] | |
if predicted_prob_tec >= 0.5: | |
predicted_angle_tec = 1 | |
else: | |
predicted_angle_tec = 0 | |
#-------------------------------------------------------- | |
if predicted_angle_funda == predicted_angle_tec: | |
try_cnt += 1 | |
if exchange_rates_diff[ii+1] >= 0 and predicted_angle_funda == 1: | |
correct_cnt += 1 | |
if exchange_rates_diff[ii+1] < 0 and predicted_angle_funda == 0: | |
correct_cnt += 1 | |
print(correct_cnt/float(try_cnt)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import numpy as np | |
import scipy.sparse | |
import xgboost as xgb | |
import pickle | |
import talib as ta | |
from datetime import datetime as dt | |
import pytz | |
OUTPUT_LEN = 1 | |
TRAINDATA_DIV = 10 | |
CHART_TYPE_JDG_LEN = 25 | |
# 0->flat 1->upper line 2-> downer line 3->above is top 4->below is top | |
def judge_chart_type(data_arr): | |
max_val = 0 | |
min_val = float("inf") | |
last_idx = len(data_arr)-1 | |
for idx in xrange(len(data_arr)): | |
if data_arr[idx] > max_val: | |
max_val = data_arr[idx] | |
max_idx = idx | |
if data_arr[idx] < min_val: | |
min_val = data_arr[idx] | |
min_idx = idx | |
if max_val == min_val: | |
return 0 | |
if min_idx == 0 and max_idx == last_idx: | |
return 1 | |
if max_idx == 0 and min_idx == last_idx: | |
return 2 | |
if max_idx != 0 and max_idx != last_idx and min_idx != 0 and min_idx != last_idx: | |
return 0 | |
if max_idx != 0 and max_idx != last_idx: | |
return 3 | |
if min_idx != 0 and min_idx != last_idx: | |
return 4 | |
return 0 | |
def get_rsi(price_arr, cur_pos, period = 40): | |
if cur_pos <= period: | |
# s = 0 | |
return 0 | |
else: | |
s = cur_pos - (period + 1) | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.RSI(prices, timeperiod = period)[-1] | |
def get_ma(price_arr, cur_pos, period = 20): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.SMA(prices, timeperiod = period)[-1] | |
def get_ma_kairi(price_arr, cur_pos, period = None): | |
ma = get_ma(price_arr, cur_pos) | |
return ((price_arr[cur_pos] - ma) / ma) * 100.0 | |
return 0 | |
def get_bb_1(price_arr, cur_pos, period = 40): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.BBANDS(prices, timeperiod = period)[0][-1] | |
def get_bb_2(price_arr, cur_pos, period = 40): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.BBANDS(prices, timeperiod = period)[2][-1] | |
def get_ema(price_arr, cur_pos, period = 20): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.EMA(prices, timeperiod = period)[-1] | |
def get_ema_rsi(price_arr, cur_pos, period = None): | |
return 0 | |
def get_cci(price_arr, cur_pos, period = None): | |
return 0 | |
def get_mo(price_arr, cur_pos, period = 20): | |
if cur_pos <= (period + 1): | |
# s = 0 | |
return 0 | |
else: | |
s = cur_pos - (period + 1) | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.CMO(prices, timeperiod = period)[-1] | |
def get_po(price_arr, cur_pos, period = 10): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
return ta.PPO(prices)[-1] | |
def get_lw(price_arr, cur_pos, period = None): | |
return 0 | |
def get_ss(price_arr, cur_pos, period = None): | |
return 0 | |
def get_dmi(price_arr, cur_pos, period = None): | |
return 0 | |
def get_vorarity(price_arr, cur_pos, period = None): | |
tmp_arr = [] | |
prev = -1 | |
for val in price_arr[cur_pos-CHART_TYPE_JDG_LEN:cur_pos]: | |
if prev == -1: | |
tmp_arr.append(0) | |
else: | |
tmp_arr.append(val - prev) | |
prev = val | |
return np.std(tmp_arr) | |
def get_macd(price_arr, cur_pos, period = 100): | |
if cur_pos <= period: | |
s = 0 | |
else: | |
s = cur_pos - period | |
tmp_arr = price_arr[s:cur_pos] | |
tmp_arr.reverse() | |
prices = np.array(tmp_arr, dtype=float) | |
macd, macdsignal, macdhist = ta.MACD(prices,fastperiod=12, slowperiod=26, signalperiod=9) | |
if macd[-1] > macdsignal[-1]: | |
return 1 | |
else: | |
return 0 | |
def is_weekend(date_str): | |
tz = pytz.timezone('Asia/Tokyo') | |
dstr = date_str.replace(".","-") | |
tdatetime = dt.strptime(dstr, '%Y-%m-%d %H:%M:%S') | |
tz_time = tz.localize(tdatetime) | |
london_tz = pytz.timezone('Europe/London') | |
london_time = tz_time.astimezone(london_tz) | |
week = london_time.weekday() | |
return (week == 5 or week == 6) | |
""" | |
main | |
""" | |
rates_fd = open('./dji2000_2009_close.csv', 'r') | |
exchange_dates = [] | |
exchange_rates = [] | |
for line in rates_fd: | |
splited = line.split(",") | |
time = splited[0] | |
val = float(splited[1]) | |
exchange_dates.append(time) | |
exchange_rates.append(val) | |
reverse_exchange_rates = [] | |
prev_org = -1 | |
prev = -1 | |
for rate in exchange_rates: | |
if prev_org != -1: | |
diff = rate - prev_org | |
reverse_exchange_rates.append(prev - diff) | |
prev_org = rate | |
prev = prev - diff | |
else: | |
reverse_exchange_rates.append(rate) | |
prev_org = rate | |
prev = rate | |
data_len = len(exchange_rates) | |
train_len = data_len - 126 | |
print "data size: " + str(data_len) | |
print "train len: " + str(train_len) | |
if False: | |
bst = xgb.Booster({'nthread':4}) | |
bst.load_model("./dji.model") | |
if True: ### training start | |
tr_input_mat = [] | |
tr_angle_mat = [] | |
for i in xrange(50, train_len, OUTPUT_LEN): | |
tr_input_mat.append( | |
[exchange_rates[i], | |
(exchange_rates[i] - exchange_rates[i - 1])/exchange_rates[i - 1], | |
get_rsi(exchange_rates, i), | |
get_ma(exchange_rates, i), | |
get_ma_kairi(exchange_rates, i), | |
get_bb_1(exchange_rates, i), | |
get_bb_2(exchange_rates, i), | |
get_ema(exchange_rates, i), | |
get_ema_rsi(exchange_rates, i), | |
get_cci(exchange_rates, i), | |
get_mo(exchange_rates, i), | |
get_lw(exchange_rates, i), | |
get_ss(exchange_rates, i), | |
get_dmi(exchange_rates, i), | |
get_vorarity(exchange_rates, i), | |
get_macd(exchange_rates, i), | |
judge_chart_type(exchange_rates[i-CHART_TYPE_JDG_LEN:i]) | |
] | |
) | |
tr_input_mat.append( | |
[reverse_exchange_rates[i], | |
(reverse_exchange_rates[i] - reverse_exchange_rates[i - 1])/reverse_exchange_rates[i - 1], | |
get_rsi(reverse_exchange_rates, i), | |
get_ma(reverse_exchange_rates, i), | |
get_ma_kairi(reverse_exchange_rates, i), | |
get_bb_1(reverse_exchange_rates, i), | |
get_bb_2(reverse_exchange_rates, i), | |
get_ema(reverse_exchange_rates, i), | |
get_ema_rsi(reverse_exchange_rates, i), | |
get_cci(reverse_exchange_rates, i), | |
get_mo(reverse_exchange_rates, i), | |
get_lw(reverse_exchange_rates, i), | |
get_ss(reverse_exchange_rates, i), | |
get_dmi(reverse_exchange_rates, i), | |
get_vorarity(reverse_exchange_rates, i), | |
get_macd(reverse_exchange_rates, i), | |
judge_chart_type(reverse_exchange_rates[i-CHART_TYPE_JDG_LEN:i]) | |
] | |
) | |
tmp = (exchange_rates[i+OUTPUT_LEN] - exchange_rates[i])/float(OUTPUT_LEN) | |
if tmp >= 0: | |
tr_angle_mat.append(1) | |
else: | |
tr_angle_mat.append(0) | |
tmp = (reverse_exchange_rates[i+OUTPUT_LEN] - reverse_exchange_rates[i])/float(OUTPUT_LEN) | |
if tmp >= 0: | |
tr_angle_mat.append(1) | |
else: | |
tr_angle_mat.append(0) | |
tr_input_arr = np.array(tr_input_mat) | |
tr_angle_arr = np.array(tr_angle_mat) | |
dtrain = xgb.DMatrix(tr_input_arr, label=tr_angle_arr) | |
param = {'max_depth':6, 'eta':0.2, 'subsumble':0.5, 'silent':1, 'objective':'binary:logistic' } | |
watchlist = [(dtrain,'train')] | |
num_round = 3000 | |
bst = xgb.train(param, dtrain, num_round, watchlist) | |
bst.dump_model('./dump.raw.txt') | |
bst.save_model('./dji.model') | |
### training end | |
all_cnt = 0 | |
correct_cnt = 0 | |
for window_s in xrange((data_len - train_len) - (OUTPUT_LEN + 1)): | |
current_spot = train_len + window_s + OUTPUT_LEN | |
# prediction | |
ts_input_mat = [] | |
ts_input_mat.append( | |
[exchange_rates[current_spot], | |
(exchange_rates[current_spot] - exchange_rates[current_spot - 1])/exchange_rates[current_spot - 1], | |
get_rsi(exchange_rates, current_spot), | |
get_ma(exchange_rates, current_spot), | |
get_ma_kairi(exchange_rates, current_spot), | |
get_bb_1(exchange_rates, current_spot), | |
get_bb_2(exchange_rates, current_spot), | |
get_ema(exchange_rates, current_spot), | |
get_ema_rsi(exchange_rates, current_spot), | |
get_cci(exchange_rates, current_spot), | |
get_mo(exchange_rates, current_spot), | |
get_lw(exchange_rates, current_spot), | |
get_ss(exchange_rates, current_spot), | |
get_dmi(exchange_rates, current_spot), | |
get_vorarity(exchange_rates, current_spot), | |
get_macd(exchange_rates, current_spot), | |
judge_chart_type(exchange_rates[current_spot-CHART_TYPE_JDG_LEN:current_spot])] | |
) | |
ts_input_arr = np.array(ts_input_mat) | |
dtest = xgb.DMatrix(ts_input_arr) | |
pred = bst.predict(dtest) | |
predicted_prob = pred[0] | |
all_cnt += 1 | |
diff = exchange_rates[current_spot + 1] - exchange_rates[current_spot] | |
if predicted_prob >= 0.5 and diff >= 0: | |
correct_cnt += 1 | |
elif predicted_prob < 0.5 and diff < 0: | |
correct_cnt += 1 | |
print correct_cnt/float(all_cnt) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment