Created
August 2, 2018 22:30
-
-
Save umitanuki/121a18ab56f515ed7deaa1d32ce93fba to your computer and use it in GitHub Desktop.
blog-mlp-scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
import numpy as np | |
import talib | |
import alpaca_trade_api as tradeapi | |
import pandas | |
import time as time | |
import os | |
# Creates dataset folders in directory script is run from | |
try: | |
os.stat("./train") | |
os.stat("./eval") | |
except BaseException: | |
os.mkdir("./train") | |
os.mkdir("./eval") | |
# api = tradeapi.REST(key_id= < your key id >, secret_key= < your secret | |
# key > ) | |
barTimeframe = "1D" # 1Min, 5Min, 15Min, 1H, 1D | |
assetList = np.loadtxt( | |
"assetList.txt", | |
comments="#", | |
delimiter=",", | |
unpack=False, | |
dtype="str") | |
# ISO8601 date format | |
trainStartDate = "2015-01-01T00:00:00.000Z" | |
trainEndDate = "2017-06-01T00:00:00.000Z" | |
evalStartDate = "2017-06-01T00:00:00.000Z" | |
evalEndDate = "2018-06-01T00:00:00.000Z" | |
targetLookaheadPeriod = 1 | |
startCutoffPeriod = 50 # Set to length of maximum period indicator | |
# Tracks position in list of symbols to download | |
iteratorPos = 0 | |
assetListLen = len(assetList) | |
while iteratorPos < assetListLen: | |
try: | |
symbol = assetList[iteratorPos] | |
# Returns market data as a pandas dataframe | |
returned_data = api.get_bars( | |
symbol, | |
barTimeframe, | |
start_dt=trainStartDate, | |
end_dt=evalEndDate).df | |
# Processes all data into numpy arrays for use by talib | |
timeList = np.array(returned_data.index) | |
openList = np.array(returned_data.open, dtype=np.float64) | |
highList = np.array(returned_data.high, dtype=np.float64) | |
lowList = np.array(returned_data.low, dtype=np.float64) | |
closeList = np.array(returned_data.close, dtype=np.float64) | |
volumeList = np.array(returned_data.volume, dtype=np.float64) | |
# Adjusts data lists due to the reward function look ahead period | |
shiftedTimeList = timeList[:-targetLookaheadPeriod] | |
shiftedClose = closeList[targetLookaheadPeriod:] | |
highList = highList[:-targetLookaheadPeriod] | |
lowList = lowList[:-targetLookaheadPeriod] | |
closeList = closeList[:-targetLookaheadPeriod] | |
# Calculate trading indicators | |
RSI14 = talib.RSI(closeList, 14) | |
RSI50 = talib.RSI(closeList, 50) | |
STOCH14K, STOCH14D = talib.STOCH( | |
highList, lowList, closeList, fastk_period=14, slowk_period=3, slowd_period=3) | |
# Calulate network target/ reward function for training | |
closeDifference = shiftedClose - closeList | |
closeDifferenceLen = len(closeDifference) | |
# Creates a binary output if the market moves up or down, for use as | |
# one-hot labels | |
longOutput = np.zeros(closeDifferenceLen) | |
longOutput[closeDifference >= 0] = 1 | |
shortOutput = np.zeros(closeDifferenceLen) | |
shortOutput[closeDifference < 0] = 1 | |
# Constructs the dataframe and writes to CSV file | |
outputDF = { | |
"close": closeList, # Not to be included in network training, only for later analysis | |
"RSI14": RSI14, | |
"RSI50": RSI50, | |
"STOCH14K": STOCH14K, | |
"STOCH14D": STOCH14D, | |
"longOutput": longOutput, | |
"shortOutput": shortOutput | |
} | |
# Makes sure the dataframe columns don't get mixed up | |
columnOrder = ["close", "RSI14", "RSI50", "STOCH14K", | |
"STOCH14D", "longOutput", "shortOutput"] | |
outputDF = pandas.DataFrame( | |
data=outputDF, | |
index=shiftedTimeList, | |
columns=columnOrder)[ | |
startCutoffPeriod:] | |
# Splits data into training and evaluation sets | |
trainingDF = outputDF[outputDF.index < evalStartDate] | |
evalDF = outputDF[outputDF.index >= evalStartDate] | |
if (len(trainingDF) > 0 and len(evalDF) > 0): | |
print("writing " + str(symbol) + | |
", data len: " + str(len(closeList))) | |
trainingDF.to_csv("./train/" + symbol + ".csv", index_label="date") | |
evalDF.to_csv("./eval/" + symbol + ".csv", index_label="date") | |
except BaseException: | |
pass | |
time.sleep(5) # To avoid API rate limits | |
iteratorPos += 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import sys | |
import tempfile | |
from time import time | |
import random | |
from os import listdir | |
from os.path import isfile, join | |
import pandas | |
import numpy as np | |
import tensorflow as tf | |
from sklearn import metrics | |
# model settings | |
# Static seed to allow for reproducability between training runs | |
tf.set_random_seed(12345) | |
trainingCycles = 500000 # Number of training steps before ending | |
batchSize = 1000 # Number of examples per training batch | |
summarySteps = 1000 # Number of training steps between each summary | |
dropout = 0.5 # Node dropout for training | |
nodeLayout = [40, 30, 20, 10] # Layout of nodes in each layer | |
mainDirectory = str("./model_1/") | |
trainFiles = [f for f in listdir("./train/") if isfile(join("./train/", f))] | |
evalFiles = [f for f in listdir("./eval/") if isfile(join("./eval/", f))] | |
# Initialises data arrays | |
trainDataX = np.empty([0, 4]) | |
trainDataY = np.empty([0, 2]) | |
evalDataX = np.empty([0, 4]) | |
evalDataY = np.empty([0, 2]) | |
# Reads training data into memory | |
readPos = 0 | |
for fileName in trainFiles: | |
importedData = pandas.read_csv("./train/" + fileName, sep=',') | |
xValuesDF = importedData[["RSI14", "RSI50", "STOCH14K", "STOCH14D"]] | |
yValuesDF = importedData[["longOutput", "shortOutput"]] | |
xValues = np.array(xValuesDF.values.tolist()) | |
yValues = np.array(yValuesDF.values.tolist()) | |
trainDataX = np.concatenate([trainDataX, xValues], axis=0) | |
trainDataY = np.concatenate([trainDataY, yValues], axis=0) | |
if readPos % 50 == 0 and readPos > 0: | |
print("Loaded " + str(readPos) + " training files") | |
readPos += 1 | |
print("\n\n") | |
# Reads evalutation data into memory | |
readPos = 0 | |
for fileName in evalFiles: | |
importedData = pandas.read_csv("./eval/" + fileName, sep=',') | |
xValuesDF = importedData[["RSI14", "RSI50", "STOCH14K", "STOCH14D"]] | |
yValuesDF = importedData[["longOutput", "shortOutput"]] | |
xValues = np.array(xValuesDF.values.tolist()) | |
yValues = np.array(yValuesDF.values.tolist()) | |
evalDataX = np.concatenate([evalDataX, xValues], axis=0) | |
evalDataY = np.concatenate([evalDataY, yValues], axis=0) | |
if readPos % 50 == 0 and readPos > 0: | |
print("Loaded " + str(readPos) + " training files") | |
readPos += 1 | |
print("\n\n") | |
# used to sample batches from your data for training | |
def createTrainingBatch(amount): | |
randomBatchPos = np.random.randint(0, trainDataX.shape[0], amount) | |
xOut = trainDataX[randomBatchPos] | |
yOut = trainDataY[randomBatchPos] | |
return xOut, yOut | |
tf.logging.set_verbosity(tf.logging.INFO) | |
# ML training and evaluation functions | |
def train(): | |
globalStepTensor = tf.Variable(0, trainable=False, name='global_step') | |
sess = tf.InteractiveSession() | |
# placeholder for the input features | |
x = tf.placeholder(tf.float32, [None, 4]) | |
# placeholder for the one-hot labels | |
y = tf.placeholder(tf.float32, [None, 2]) | |
# placeholder for node dropout rate | |
internalDropout = tf.placeholder(tf.float32, None) | |
net = x # input layer is the trading indicators | |
# Creates the neural network model | |
with tf.name_scope('network'): | |
# Initialises each layer in the network | |
layerPos = 0 | |
for units in nodeLayout: | |
net = tf.layers.dense( | |
net, | |
units=units, | |
activation=tf.nn.tanh, | |
name=str( | |
"dense" + | |
str(units) + | |
"_" + | |
str(layerPos))) # adds each layer to the networm as specified by nodeLayout | |
# dropout layer after each layer | |
net = tf.layers.dropout(net, rate=internalDropout) | |
layerPos += 1 | |
logits = tf.layers.dense( | |
net, 2, activation=tf.nn.softmax) # network output | |
with tf.name_scope('lossFunction'): | |
cross_entropy_loss = tf.reduce_mean( | |
tf.nn.softmax_cross_entropy_with_logits_v2( | |
labels=y, | |
logits=logits)) # on NO account put this within a name scope - tensorboard shits itself | |
with tf.name_scope('trainingStep'): | |
tf.summary.scalar('crossEntropyLoss', cross_entropy_loss) | |
trainStep = tf.train.AdamOptimizer(0.0001).minimize( | |
cross_entropy_loss, global_step=globalStepTensor) | |
with tf.name_scope('accuracy'): | |
correctPrediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1)) | |
accuracy = tf.reduce_mean(tf.cast(correctPrediction, tf.float32)) | |
tf.summary.scalar('accuracy', accuracy) | |
merged = tf.summary.merge_all() | |
trainWriter = tf.summary.FileWriter( | |
mainDirectory + '/train', sess.graph, flush_secs=1, max_queue=2) | |
evalWriter = tf.summary.FileWriter( | |
mainDirectory + '/eval', sess.graph, flush_secs=1, max_queue=2) | |
tf.global_variables_initializer().run() | |
# Saves the model at defined checkpoints and loads any available model at | |
# start-up | |
saver = tf.train.Saver(max_to_keep=2, name="checkpoint") | |
path = tf.train.get_checkpoint_state(mainDirectory) | |
if path is not None: | |
saver.restore(sess, tf.train.latest_checkpoint(mainDirectory)) | |
lastTime = time() | |
while tf.train.global_step(sess, globalStepTensor) <= trainingCycles: | |
globalStep = tf.train.global_step(sess, globalStepTensor) | |
# generates batch for each training cycle | |
xFeed, yFeed = createTrainingBatch(batchSize) | |
# Record summaries and accuracy on both train and eval data | |
if globalStep % summarySteps == 0: | |
currentTime = time() | |
totalTime = (currentTime - lastTime) | |
print(str(totalTime) + " seconds, " + | |
str(summarySteps / totalTime) + " steps/sec") | |
lastTime = currentTime | |
summary, accuracyOut, _ = sess.run([ | |
merged, | |
accuracy, | |
trainStep | |
], | |
feed_dict={ | |
x: xFeed, | |
y: yFeed, | |
internalDropout: dropout | |
}) | |
trainWriter.add_summary(summary, globalStep) | |
trainWriter.flush() | |
print('Train accuracy at step %s: %s' % (globalStep, accuracyOut)) | |
summary, accuracyOut = sess.run([ | |
merged, | |
accuracy, | |
], | |
feed_dict={ | |
x: evalDataX, | |
y: evalDataY, | |
internalDropout: 0 | |
}) | |
evalWriter.add_summary(summary, globalStep) | |
evalWriter.flush() | |
print('Eval accuracy at step %s: %s' % (globalStep, accuracyOut)) | |
print("\n\n") | |
saver.save(sess, save_path=str(mainDirectory + "model"), | |
global_step=globalStep) # saves a snapshot of the model | |
else: # Training cycle | |
_ = sess.run( | |
[trainStep], feed_dict={ | |
x: xFeed, | |
y: yFeed, | |
internalDropout: dropout | |
}) | |
trainWriter.close() | |
evalWriter.close() | |
train() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment