Skip to content

Instantly share code, notes, and snippets.

@beannguyen
Last active November 10, 2018 14:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save beannguyen/04d08ce735b9019ea5bf9e14d2adc7f5 to your computer and use it in GitHub Desktop.
Save beannguyen/04d08ce735b9019ea5bf9e14d2adc7f5 to your computer and use it in GitHub Desktop.
from __future__ import absolute_import, division, print_function
from math import sqrt
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import Sequential, optimizers, metrics
from keras.layers import LSTM, Dropout, Dense
from keras.losses import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
# fix random seed for reproducibility
np.random.seed(7)
all_dataset = []
def preprocessing(file):
_data = []
print('Processing file ', file)
f = open(file, 'r')
try:
df = pd.read_csv(f, delimiter=',', usecols=['date', 'open', 'high', 'low', 'close'])
df = df.sort_values('date')
df = df.drop(['date'], axis=1)
dataset = df.values
if dataset.shape[0] >= num_unrolling + look_back_step:
for set_i in range(dataset.shape[0] - (num_unrolling + look_back_step)):
set0 = dataset[set_i:set_i + num_unrolling + look_back_step]
for j in range(num_unrolling + look_back_step):
_data.append(set0[j])
return np.array(_data)
except:
traceback.print_exc()
return None
finally:
f.close()
def scale_data(train_data):
scaler = MinMaxScaler(feature_range=(0, 1))
train_data = train_data.reshape(-1, features)
scaler.fit(train_data)
train_data = scaler.transform(train_data)
return scaler, train_data
def create_dataset(files):
global all_dataset
# to combine more than 1 file to dataset
for i, f in enumerate(files):
_data = preprocessing(f)
if _data is not None and len(_data) > 0:
scaled_data = np.array(_data)
if len(all_dataset) == 0:
all_dataset = scaled_data
else:
all_dataset = np.concatenate((all_dataset, scaled_data), axis=0)
# break
if max_file_process != -1 and i > max_file_process:
break
dataset = np.array(all_dataset)
batch_num = dataset.shape[0] // (look_back_step + num_unrolling)
train_size = int(batch_num * 0.7)
test_size = int(batch_num * 0.2)
train_data = dataset[:train_size]
test_data = dataset[train_size + 1:train_size + 1 + test_size]
eval_data = dataset[train_size + 1 + test_size + 1:batch_num]
return train_data, test_data, eval_data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment