Skip to content

Instantly share code, notes, and snippets.

@umitanuki
Created August 2, 2018 22:30
Show Gist options
  • Save umitanuki/121a18ab56f515ed7deaa1d32ce93fba to your computer and use it in GitHub Desktop.
Save umitanuki/121a18ab56f515ed7deaa1d32ce93fba to your computer and use it in GitHub Desktop.
blog-mlp-scripts
from datetime import datetime
import numpy as np
import talib
import alpaca_trade_api as tradeapi
import pandas
import time as time
import os
# Creates dataset folders in directory script is run from
try:
os.stat("./train")
os.stat("./eval")
except BaseException:
os.mkdir("./train")
os.mkdir("./eval")
# api = tradeapi.REST(key_id= < your key id >, secret_key= < your secret
# key > )
barTimeframe = "1D" # 1Min, 5Min, 15Min, 1H, 1D
assetList = np.loadtxt(
"assetList.txt",
comments="#",
delimiter=",",
unpack=False,
dtype="str")
# ISO8601 date format
trainStartDate = "2015-01-01T00:00:00.000Z"
trainEndDate = "2017-06-01T00:00:00.000Z"
evalStartDate = "2017-06-01T00:00:00.000Z"
evalEndDate = "2018-06-01T00:00:00.000Z"
targetLookaheadPeriod = 1
startCutoffPeriod = 50 # Set to length of maximum period indicator
# Tracks position in list of symbols to download
iteratorPos = 0
assetListLen = len(assetList)
while iteratorPos < assetListLen:
try:
symbol = assetList[iteratorPos]
# Returns market data as a pandas dataframe
returned_data = api.get_bars(
symbol,
barTimeframe,
start_dt=trainStartDate,
end_dt=evalEndDate).df
# Processes all data into numpy arrays for use by talib
timeList = np.array(returned_data.index)
openList = np.array(returned_data.open, dtype=np.float64)
highList = np.array(returned_data.high, dtype=np.float64)
lowList = np.array(returned_data.low, dtype=np.float64)
closeList = np.array(returned_data.close, dtype=np.float64)
volumeList = np.array(returned_data.volume, dtype=np.float64)
# Adjusts data lists due to the reward function look ahead period
shiftedTimeList = timeList[:-targetLookaheadPeriod]
shiftedClose = closeList[targetLookaheadPeriod:]
highList = highList[:-targetLookaheadPeriod]
lowList = lowList[:-targetLookaheadPeriod]
closeList = closeList[:-targetLookaheadPeriod]
# Calculate trading indicators
RSI14 = talib.RSI(closeList, 14)
RSI50 = talib.RSI(closeList, 50)
STOCH14K, STOCH14D = talib.STOCH(
highList, lowList, closeList, fastk_period=14, slowk_period=3, slowd_period=3)
# Calulate network target/ reward function for training
closeDifference = shiftedClose - closeList
closeDifferenceLen = len(closeDifference)
# Creates a binary output if the market moves up or down, for use as
# one-hot labels
longOutput = np.zeros(closeDifferenceLen)
longOutput[closeDifference >= 0] = 1
shortOutput = np.zeros(closeDifferenceLen)
shortOutput[closeDifference < 0] = 1
# Constructs the dataframe and writes to CSV file
outputDF = {
"close": closeList, # Not to be included in network training, only for later analysis
"RSI14": RSI14,
"RSI50": RSI50,
"STOCH14K": STOCH14K,
"STOCH14D": STOCH14D,
"longOutput": longOutput,
"shortOutput": shortOutput
}
# Makes sure the dataframe columns don't get mixed up
columnOrder = ["close", "RSI14", "RSI50", "STOCH14K",
"STOCH14D", "longOutput", "shortOutput"]
outputDF = pandas.DataFrame(
data=outputDF,
index=shiftedTimeList,
columns=columnOrder)[
startCutoffPeriod:]
# Splits data into training and evaluation sets
trainingDF = outputDF[outputDF.index < evalStartDate]
evalDF = outputDF[outputDF.index >= evalStartDate]
if (len(trainingDF) > 0 and len(evalDF) > 0):
print("writing " + str(symbol) +
", data len: " + str(len(closeList)))
trainingDF.to_csv("./train/" + symbol + ".csv", index_label="date")
evalDF.to_csv("./eval/" + symbol + ".csv", index_label="date")
except BaseException:
pass
time.sleep(5) # To avoid API rate limits
iteratorPos += 1
import argparse
import sys
import tempfile
from time import time
import random
from os import listdir
from os.path import isfile, join
import pandas
import numpy as np
import tensorflow as tf
from sklearn import metrics
# model settings
# Static seed to allow for reproducability between training runs
tf.set_random_seed(12345)
trainingCycles = 500000 # Number of training steps before ending
batchSize = 1000 # Number of examples per training batch
summarySteps = 1000 # Number of training steps between each summary
dropout = 0.5 # Node dropout for training
nodeLayout = [40, 30, 20, 10] # Layout of nodes in each layer
mainDirectory = str("./model_1/")
trainFiles = [f for f in listdir("./train/") if isfile(join("./train/", f))]
evalFiles = [f for f in listdir("./eval/") if isfile(join("./eval/", f))]
# Initialises data arrays
trainDataX = np.empty([0, 4])
trainDataY = np.empty([0, 2])
evalDataX = np.empty([0, 4])
evalDataY = np.empty([0, 2])
# Reads training data into memory
readPos = 0
for fileName in trainFiles:
importedData = pandas.read_csv("./train/" + fileName, sep=',')
xValuesDF = importedData[["RSI14", "RSI50", "STOCH14K", "STOCH14D"]]
yValuesDF = importedData[["longOutput", "shortOutput"]]
xValues = np.array(xValuesDF.values.tolist())
yValues = np.array(yValuesDF.values.tolist())
trainDataX = np.concatenate([trainDataX, xValues], axis=0)
trainDataY = np.concatenate([trainDataY, yValues], axis=0)
if readPos % 50 == 0 and readPos > 0:
print("Loaded " + str(readPos) + " training files")
readPos += 1
print("\n\n")
# Reads evalutation data into memory
readPos = 0
for fileName in evalFiles:
importedData = pandas.read_csv("./eval/" + fileName, sep=',')
xValuesDF = importedData[["RSI14", "RSI50", "STOCH14K", "STOCH14D"]]
yValuesDF = importedData[["longOutput", "shortOutput"]]
xValues = np.array(xValuesDF.values.tolist())
yValues = np.array(yValuesDF.values.tolist())
evalDataX = np.concatenate([evalDataX, xValues], axis=0)
evalDataY = np.concatenate([evalDataY, yValues], axis=0)
if readPos % 50 == 0 and readPos > 0:
print("Loaded " + str(readPos) + " training files")
readPos += 1
print("\n\n")
# used to sample batches from your data for training
def createTrainingBatch(amount):
randomBatchPos = np.random.randint(0, trainDataX.shape[0], amount)
xOut = trainDataX[randomBatchPos]
yOut = trainDataY[randomBatchPos]
return xOut, yOut
tf.logging.set_verbosity(tf.logging.INFO)
# ML training and evaluation functions
def train():
globalStepTensor = tf.Variable(0, trainable=False, name='global_step')
sess = tf.InteractiveSession()
# placeholder for the input features
x = tf.placeholder(tf.float32, [None, 4])
# placeholder for the one-hot labels
y = tf.placeholder(tf.float32, [None, 2])
# placeholder for node dropout rate
internalDropout = tf.placeholder(tf.float32, None)
net = x # input layer is the trading indicators
# Creates the neural network model
with tf.name_scope('network'):
# Initialises each layer in the network
layerPos = 0
for units in nodeLayout:
net = tf.layers.dense(
net,
units=units,
activation=tf.nn.tanh,
name=str(
"dense" +
str(units) +
"_" +
str(layerPos))) # adds each layer to the networm as specified by nodeLayout
# dropout layer after each layer
net = tf.layers.dropout(net, rate=internalDropout)
layerPos += 1
logits = tf.layers.dense(
net, 2, activation=tf.nn.softmax) # network output
with tf.name_scope('lossFunction'):
cross_entropy_loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(
labels=y,
logits=logits)) # on NO account put this within a name scope - tensorboard shits itself
with tf.name_scope('trainingStep'):
tf.summary.scalar('crossEntropyLoss', cross_entropy_loss)
trainStep = tf.train.AdamOptimizer(0.0001).minimize(
cross_entropy_loss, global_step=globalStepTensor)
with tf.name_scope('accuracy'):
correctPrediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correctPrediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()
trainWriter = tf.summary.FileWriter(
mainDirectory + '/train', sess.graph, flush_secs=1, max_queue=2)
evalWriter = tf.summary.FileWriter(
mainDirectory + '/eval', sess.graph, flush_secs=1, max_queue=2)
tf.global_variables_initializer().run()
# Saves the model at defined checkpoints and loads any available model at
# start-up
saver = tf.train.Saver(max_to_keep=2, name="checkpoint")
path = tf.train.get_checkpoint_state(mainDirectory)
if path is not None:
saver.restore(sess, tf.train.latest_checkpoint(mainDirectory))
lastTime = time()
while tf.train.global_step(sess, globalStepTensor) <= trainingCycles:
globalStep = tf.train.global_step(sess, globalStepTensor)
# generates batch for each training cycle
xFeed, yFeed = createTrainingBatch(batchSize)
# Record summaries and accuracy on both train and eval data
if globalStep % summarySteps == 0:
currentTime = time()
totalTime = (currentTime - lastTime)
print(str(totalTime) + " seconds, " +
str(summarySteps / totalTime) + " steps/sec")
lastTime = currentTime
summary, accuracyOut, _ = sess.run([
merged,
accuracy,
trainStep
],
feed_dict={
x: xFeed,
y: yFeed,
internalDropout: dropout
})
trainWriter.add_summary(summary, globalStep)
trainWriter.flush()
print('Train accuracy at step %s: %s' % (globalStep, accuracyOut))
summary, accuracyOut = sess.run([
merged,
accuracy,
],
feed_dict={
x: evalDataX,
y: evalDataY,
internalDropout: 0
})
evalWriter.add_summary(summary, globalStep)
evalWriter.flush()
print('Eval accuracy at step %s: %s' % (globalStep, accuracyOut))
print("\n\n")
saver.save(sess, save_path=str(mainDirectory + "model"),
global_step=globalStep) # saves a snapshot of the model
else: # Training cycle
_ = sess.run(
[trainStep], feed_dict={
x: xFeed,
y: yFeed,
internalDropout: dropout
})
trainWriter.close()
evalWriter.close()
train()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment