Skip to content

Instantly share code, notes, and snippets.

@ryogrid
Created March 25, 2017 02:51
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryogrid/e5274ac31a5b9ac02e8a9fe33b8ac921 to your computer and use it in GitHub Desktop.
Save ryogrid/e5274ac31a5b9ac02e8a9fe33b8ac921 to your computer and use it in GitHub Desktop.
validation of "Twitter mood predicts the stock market"
2009/1/1 0 0
2009/1/2 0 9034.69043
2009/1/3 0 0
2009/1/4 0 0
2009/1/5 0 8952.889648
2009/1/6 0 9015.099609
2009/1/7 0 8769.700195
2009/1/8 0 8742.459961
2009/1/9 0 8599.179688
2009/1/10 0 0
2009/1/11 0 0
2009/1/12 0 8473.969727
2009/1/13 0 8448.55957
2009/1/14 0 8200.139648
2009/1/15 0 8212.490234
2009/1/16 0 8281.219727
2009/1/17 0.25 0
2009/1/18 0.2 0
2009/1/19 0 0
2009/1/20 0.1 7949.089844
2009/1/21 0 8228.099609
2009/1/22 0 8122.799805
2009/1/23 0 8077.560059
2009/1/24 0.25 0
2009/1/25 0 0
2009/1/26 0 8116.029785
2009/1/27 0 8174.72998
2009/1/28 0 8375.450195
2009/1/29 0.047619048 8149.009766
2009/1/30 0 8000.859863
2009/1/31 0 0
2009/2/1 0 0
2009/2/2 0 7936.75
2009/2/3 0 8078.359863
2009/2/4 0 7956.660156
2009/2/5 0 8063.069824
2009/2/6 0 8280.589844
2009/2/7 0 0
2009/2/8 0 0
2009/2/9 0 8270.870117
2009/2/10 0 7888.879883
2009/2/11 0 7939.529785
2009/2/12 0 7932.759766
2009/2/13 0 7850.410156
2009/2/14 0 0
2009/2/15 0 0
2009/2/16 0 0
2009/2/17 0 7552.600098
2009/2/18 0 7555.629883
2009/2/19 0 7465.950195
2009/2/20 0.05 7365.669922
2009/2/21 0 0
2009/2/22 0 0
2009/2/23 0 7114.779785
2009/2/24 0 7350.939941
2009/2/25 0 7270.890137
2009/2/26 0.095238095 7182.080078
2009/2/27 0 7062.930176
2009/2/28 0 0
2009/3/1 0 0
2009/3/2 0 6763.290039
2009/3/3 0 6726.02002
2009/3/4 0 6875.839844
2009/3/5 0 6594.439941
2009/3/6 0.066666667 6626.939941
2009/3/7 0 0
2009/3/8 0 0
2009/3/9 0 6547.049805
2009/3/10 0.058823529 6926.490234
2009/3/11 0 6930.399902
2009/3/12 0 7170.060059
2009/3/13 0 7223.97998
2009/3/14 0 0
2009/3/15 0 0
2009/3/16 0 7216.970215
2009/3/17 0.052631579 7395.700195
2009/3/18 0.047619048 7486.580078
2009/3/19 0.026315789 7400.799805
2009/3/20 0.025641026 7278.379883
2009/3/21 0.05 0
2009/3/22 0.047619048 0
2009/3/23 0.047619048 7775.859863
2009/3/24 0.026315789 7659.970215
2009/3/25 0.025 7749.810059
2009/3/26 0 7924.560059
2009/3/27 0 7776.180176
2009/3/28 0.068965517 0
2009/3/29 0 0
2009/3/30 0 7522.02002
2009/3/31 0 7608.919922
2009/4/1 0 7761.600098
2009/4/2 0 7978.080078
2009/4/3 0 8017.589844
2009/4/4 0 0
2009/4/5 0 0
2009/4/6 0 7975.850098
2009/4/7 0 7789.560059
2009/4/8 0.028571429 7837.109863
2009/4/9 0 8083.379883
2009/4/10 0 0
2009/4/11 0 0
2009/4/12 0.033333333 0
2009/4/13 0 8057.810059
2009/4/14 0 7920.180176
2009/4/15 0 8029.620117
2009/4/16 0 8125.430176
2009/4/17 0.015873016 8131.330078
2009/4/18 0 0
2009/4/19 0.035714286 0
2009/4/20 0 7841.72998
2009/4/21 0 7969.560059
2009/4/22 0.02739726 7886.569824
2009/4/23 0.015151515 7957.060059
2009/4/24 0 8076.290039
2009/4/25 0 0
2009/4/26 0 0
2009/4/27 0 8025
2009/4/28 0 8016.950195
2009/4/29 0.016129032 8185.72998
2009/4/30 0 8168.120117
2009/5/1 0 8212.410156
2009/5/2 0 0
2009/5/3 0.02173913 0
2009/5/4 0 8426.740234
2009/5/5 0.016393443 8410.650391
2009/5/6 0.015873016 8512.280273
2009/5/7 0.017857143 8409.849609
2009/5/8 0.022727273 8574.650391
2009/5/9 0 0
2009/5/10 0 0
2009/5/11 0.017241379 8418.769531
2009/5/12 0 8469.110352
2009/5/13 0.034482759 8284.889648
2009/5/14 0.037037037 8331.320312
2009/5/15 0 8268.639648
2009/5/16 0 0
2009/5/17 0.020833333 0
2009/5/18 0.033898305 8504.080078
2009/5/19 0.065789474 8474.849609
2009/5/20 0.025974026 8422.040039
2009/5/21 0 8292.129883
2009/5/22 0 8277.320312
2009/5/23 0.02 0
2009/5/24 0 0
2009/5/25 0.038461538 0
2009/5/26 0 8473.490234
2009/5/27 0 8300.019531
2009/5/28 0.01754386 8403.799805
2009/5/29 0.016393443 8500.330078
2009/5/30 0 0
2009/5/31 0.025641026 0
2009/6/1 0.014705882 8721.44043
2009/6/2 0.014084507 8740.870117
2009/6/3 0.014925373 8675.240234
2009/6/4 0.025316456 8750.240234
2009/6/5 0 8763.129883
2009/6/6 0.017857143 0
2009/6/7 0.068181818 0
2009/6/8 0 8764.490234
2009/6/9 0 8763.05957
2009/6/10 0.02739726 8739.019531
2009/6/11 0.028169014 8770.919922
2009/6/12 0.027777778 8799.259766
2009/6/13 0.021276596 0
2009/6/14 0 0
2009/6/15 0.012048193 8612.129883
2009/6/16 0.014492754 8504.669922
2009/6/17 0.025316456 8497.179688
2009/6/18 0.014285714 8555.599609
2009/6/19 0 8539.730469
2009/6/20 0.013157895 0
2009/6/21 0.013513514 0
2009/6/22 0 8339.009766
2009/6/23 0.024691358 8322.910156
2009/6/24 0.04 8299.860352
2009/6/25 0.034883721 8472.400391
2009/6/26 0 8438.389648
2009/6/27 0.013157895 0
2009/6/28 0 0
2009/6/29 0.01 8529.379883
2009/6/30 0 8447
2009/7/1 0.011111111 8504.05957
2009/7/2 0 8280.740234
2009/7/3 0.025641026 0
2009/7/4 0.030769231 0
2009/7/5 0 0
2009/7/6 0.012195122 8324.870117
2009/7/7 0.009009009 8163.600098
2009/7/8 0.018518519 8178.410156
2009/7/9 0.029411765 8183.169922
2009/7/10 0.009433962 8146.52002
2009/7/11 0.014925373 0
2009/7/12 0 0
2009/7/13 0.007042254 8331.679688
2009/7/14 0.008547009 8359.490234
2009/7/15 0.023809524 8616.209961
2009/7/16 0.016949153 8711.820312
2009/7/17 0.02173913 8743.94043
2009/7/18 0.046511628 0
2009/7/19 0.011235955 0
2009/7/20 0.008130081 8848.150391
2009/7/21 0.014084507 8915.94043
2009/7/22 0.020689655 8881.259766
2009/7/23 0.008196721 9069.290039
2009/7/24 0.015625 9093.240234
2009/7/25 0.03030303 0
2009/7/26 0.011363636 0
2009/7/27 0.022222222 9108.509766
2009/7/28 0.016393443 9096.719727
2009/7/29 0.013605442 9070.719727
2009/7/30 0.0078125 9154.459961
2009/7/31 0.02 9171.610352
2009/8/1 0.017699115 0
2009/8/2 0.009708738 0
2009/8/3 0.02238806 9286.55957
2009/8/4 0.014492754 9320.19043
2009/8/5 0.020408163 9280.969727
2009/8/6 0.021505376 9256.259766
2009/8/7 0.024 9370.070312
2009/8/8 0 0
2009/8/9 0.0078125 0
2009/8/10 0.016759777 9337.950195
2009/8/11 0.020408163 9241.450195
2009/8/12 0.019736842 9361.610352
2009/8/13 0.011976048 9398.19043
2009/8/14 0.032679739 9321.400391
2009/8/15 0 0
2009/8/16 0.017094017 0
2009/8/17 0.01754386 9135.339844
2009/8/18 0.015957447 9217.94043
2009/8/19 0.005181347 9279.160156
2009/8/20 0.013392857 9350.049805
2009/8/21 0.022522523 9505.959961
2009/8/22 0.006756757 0
2009/8/23 0.025641026 0
2009/8/24 0.014084507 9509.280273
2009/8/25 0.016393443 9539.290039
2009/8/26 0.018604651 9543.519531
2009/8/27 0.012711864 9580.629883
2009/8/28 0.006711409 9544.200195
2009/8/29 0.030150754 0
2009/8/30 0.013392857 0
2009/8/31 0.012903226 9496.280273
2009/9/1 0.01552795 9310.599609
2009/9/2 0.025210084 9280.669922
2009/9/3 0.005221932 9344.610352
2009/9/4 0.007915567 9441.269531
2009/9/5 0.02295082 0
2009/9/6 0.018867925 0
2009/9/7 0.028795812 0
2009/9/8 0.006198347 9497.339844
2009/9/9 0.013761468 9547.219727
2009/9/10 0.007604563 9627.480469
2009/9/11 0.010701546 9605.410156
2009/9/12 0.020807834 0
2009/9/13 0.01965812 0
2009/9/14 0.016646849 9626.799805
2009/9/15 0.015573227 9683.410156
2009/9/16 0.015588096 9791.709961
2009/9/17 0.012680115 9783.919922
2009/9/18 0.014712246 9820.200195
2009/9/19 0.016663106 0
2009/9/20 0.016687737 0
2009/9/21 0.009681881 9778.860352
2009/9/22 0.012438531 9829.870117
2009/9/23 0.014212621 9748.549805
2009/9/24 0.012684989 9707.44043
2009/9/25 0.018932039 9665.19043
2009/9/26 0.021929825 0
2009/9/27 0.023674242 0
2009/9/28 0.023346304 9789.360352
2009/9/29 0.010989011 9742.200195
2009/9/30 0.016339869 9712.280273
2009/10/1 0.025547445 9509.280273
2009/10/2 0 9487.669922
2009/10/3 0.007142857 0
2009/10/4 0.013574661 0
2009/10/5 0.014084507 9599.75
2009/10/6 0.016025641 9731.25
2009/10/7 0.011428571 9725.580078
2009/10/8 0.024570025 9786.870117
2009/10/9 0.017156863 9864.94043
2009/10/10 0.028169014 0
2009/10/11 0.017191977 0
2009/10/12 0.015243902 9885.799805
2009/10/13 0.014583333 9871.05957
2009/10/14 0.022312373 10015.86035
2009/10/15 0.012891344 10062.94043
2009/10/16 0.013888889 9995.910156
2009/10/17 0.019230769 0
2009/10/18 0.039215686 0
2009/10/19 0.013752456 10092.19043
2009/10/20 0.011090573 10041.48047
2009/10/21 0.006700168 9949.360352
2009/10/22 0.021374046 10081.30957
2009/10/23 0.021472393 9972.179688
2009/10/24 0.021568627 0
2009/10/25 0.018998273 0
2009/10/26 0.013297872 9867.959961
2009/10/27 0.010452962 9882.169922
2009/10/28 0.02 9762.69043
2009/10/29 0.015518914 9962.580078
2009/10/30 0.014519056 9712.730469
2009/10/31 0.013245033 0
2009/11/1 0.022704837 0
2009/11/2 0.014285714 9789.44043
2009/11/3 0.012682308 9771.910156
2009/11/4 0.017184035 9802.139648
2009/11/5 0.012458074 10005.95996
2009/11/6 0.016970725 10023.41992
2009/11/7 0.018216683 0
2009/11/8 0.021505376 0
2009/11/9 0.018459354 10226.94043
2009/11/10 0.01679808 10246.96973
2009/11/11 0.017414861 10291.25977
2009/11/12 0.013504155 10197.46973
2009/11/13 0.013465854 10270.46973
2009/11/14 0.016851441 0
2009/11/15 0.013636364 0
2009/11/16 0.015161725 10406.95996
2009/11/17 0.018175463 10437.41992
2009/11/18 0.015157116 10426.30957
2009/11/19 0.01795977 10332.44043
2009/11/20 0.018794326 10318.16016
2009/11/21 0.017169615 0
2009/11/22 0.018613309 0
2009/11/23 0.016086957 10450.9502
2009/11/24 0.021071115 10433.70996
2009/11/25 0.016480853 10464.40039
2009/11/26 0.010323469 0
2009/11/27 0.018628281 10309.91992
2009/11/28 0.023310023 0
2009/11/29 0.017351598 0
2009/11/30 0.022164276 10344.83984
2009/12/1 0.022036474 10471.58008
2009/12/2 0.01710864 10452.67969
2009/12/3 0.016496465 10366.15039
2009/12/4 0.022570533 10388.90039
2009/12/5 0.019125683 0
2009/12/6 0.018083183 0
2009/12/7 0.011235955 10390.11035
2009/12/8 0.010526316 10285.96973
#!/usr/bin/python
import numpy as np
import scipy.sparse
import xgboost as xgb
import pickle
import talib as ta
from datetime import datetime as dt
import pytz
INPUT_LEN = 3
TRAINDATA_DIV = 2
"""
main
"""
rates_fd = open('./calm_dji_2009.csv', 'r')
exchange_dates = []
calm_rates = []
calm_rates_diff = []
exchange_rates = []
exchange_rates_diff = []
prev_calm = 0
prev_exch = 10000
for line in rates_fd:
splited = line.split(",")
time = splited[0]
calm_val = float(splited[1])
exch_val = float(splited[2])
exchange_dates.append(time)
calm_rates.append(calm_val)
calm_rates_diff.append(calm_val - prev_calm)
exchange_rates.append(exch_val)
exchange_rates_diff.append(exch_val - prev_exch)
prev_calm = calm_val
prev_exch = exch_val
data_len = len(exchange_rates)
train_len = len(exchange_rates)/TRAINDATA_DIV
print "data size: " + str(data_len)
print "train len: " + str(train_len)
if False:
bst = xgb.Booster({'nthread':4})
bst.load_model("./calm.model")
if True: ### training start
tr_input_mat = []
tr_angle_mat = []
prev_pred = 0
for ii in xrange(INPUT_LEN, train_len):
tmp_arr = []
for jj in xrange(INPUT_LEN):
tmp_arr.append(exchange_rates_diff[ii-INPUT_LEN+jj])
for jj in xrange(INPUT_LEN):
tmp_arr.append(calm_rates_diff[ii-INPUT_LEN+jj])
tr_input_mat.append(tmp_arr)
if exchange_rates_diff[ii + 1] >= 0:
tr_angle_mat.append(1)
else:
tr_angle_mat.append(0)
tr_input_arr = np.array(tr_input_mat)
tr_angle_arr = np.array(tr_angle_mat)
dtrain = xgb.DMatrix(tr_input_arr, label=tr_angle_arr)
param = {'max_depth':6, 'eta':0.2, 'subsumble':0.5, 'silent':1, 'objective':'binary:logistic' }
watchlist = [(dtrain,'train')]
num_round = 3000 #3000 #10 #3000 # 1000
bst = xgb.train(param, dtrain, num_round, watchlist)
try_cnt = 0
correct_cnt = 0
for ii in xrange(train_len - INPUT_LEN, data_len - 1):
if exchange_rates[ii + 1] == 0:
pass
# prediction
ts_input_mat = []
tmp_arr = []
for jj in xrange(INPUT_LEN):
tmp_arr.append(exchange_rates_diff[ii-INPUT_LEN+jj])
for jj in xrange(INPUT_LEN):
tmp_arr.append(calm_rates_diff[ii-INPUT_LEN+jj])
tr_input_mat.append(tmp_arr)
ts_input_arr = np.array(ts_input_mat)
dtest = xgb.DMatrix(ts_input_arr)
pred = bst.predict(dtest)
predicted_prob = pred[0]
if predicted_prob >= 0.5:
predicted_angle = 1
else:
predicted_angle = 0
try_cnt += 1
if exchange_rates_diff[ii+1] >= 0 and predicted_angle == 1:
correct_cnt += 1
if exchange_rates_diff[ii+1] < 0 and predicted_angle == 0:
correct_cnt += 1
print(correct_cnt/float(try_cnt))
bst.save_model('./calm.model')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment