Skip to content

Instantly share code, notes, and snippets.

@usmcamp0811
Last active February 20, 2017 01:17
Show Gist options
  • Save usmcamp0811/bc72f69e582749fc2da431f1d46107f9 to your computer and use it in GitHub Desktop.
Save usmcamp0811/bc72f69e582749fc2da431f1d46107f9 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import time
import random
'''
This class will return a random time series based window of data. It intakes a dataframe
that is already pre sorted into the correct time pattern where index 0 is timestep 0
and index N is timestep N. The function window_maker will incrementally step through
the entire dataframe returning the next window of data. The function random_window_maker
will return a random window of data that is time continuous. All outputs are numpy arrays.
'''
class time_series_windows(object):
def __init__(self, dataframe, window_size):
self.df = dataframe
self.last_rec_start = 0
self.window_size = window_size
self.row_stop = self.last_rec_start + self.window_size
self.batch = np.zeros((0, window_size*dataframe.shape[1]))
self.batch_counter = 0
def window_maker(self):
if self.row_stop <= self.df.shape[0]:
record = self.df.loc[self.last_rec_start:self.row_stop-1].as_matrix().flatten()
if len(record) == self.window_size*self.df.shape[1]:
self.batch_counter += 1
self.last_rec_start += 1
self.row_stop += 1
return record
else:
self.batch_counter = 0
self.last_rec_start = 0
self.row_stop = self.last_rec_start + self.window_size
return self.window_maker()
def make_batch(self, batch_size):
batch_start_point = random.randrange(0, self.df.shape[0]-batch_size)
self.batch_df = self.df.iloc[batch_start_point:,:].reset_index(drop=True)
self.window_batch = np.empty((0,self.df.shape[1]*self.window_size), float)
for i in range(batch_size):
self.window_batch = np.append(self.window_batch,
self.window_maker().reshape((1, self.df.shape[1]*self.window_size)), axis=0)
return self.window_batch
#
def random_window_maker(self):
_ = False
while _ is False:
random_start = random.randrange(0, self.df.shape[0])
self.last_rec_start = random_start
self.row_stop = self.last_rec_start + self.window_size
if self.row_stop <= self.df.shape[0]:
record = self.df.loc[self.last_rec_start:self.row_stop - 1].as_matrix().flatten()
_ = True
return record
if __name__ == "__main__":
df = pd.DataFrame(np.arange(0.0, 160.0).reshape((80, 2)))
df['target'] = ['target' + str(i) for i in range(80)]
# print(df.shape)
window_size = 3
win = time_series_windows(df, window_size)
for i in range(10):
data = win.window_maker()
X = data[:-1]
y = data[-1]
print(i,'X out:', X)
print(i, 'X out:', y)
print('out:', win.random_window_maker())
print(win.make_batch(8))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment