Last active
February 20, 2017 01:17
-
-
Save usmcamp0811/bc72f69e582749fc2da431f1d46107f9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import time | |
import random | |
''' | |
This class will return a random time series based window of data. It intakes a dataframe | |
that is already pre sorted into the correct time pattern where index 0 is timestep 0 | |
and index N is timestep N. The function window_maker will incrementally step through | |
the entire dataframe returning the next window of data. The function random_window_maker | |
will return a random window of data that is time continuous. All outputs are numpy arrays. | |
''' | |
class time_series_windows(object): | |
def __init__(self, dataframe, window_size): | |
self.df = dataframe | |
self.last_rec_start = 0 | |
self.window_size = window_size | |
self.row_stop = self.last_rec_start + self.window_size | |
self.batch = np.zeros((0, window_size*dataframe.shape[1])) | |
self.batch_counter = 0 | |
def window_maker(self): | |
if self.row_stop <= self.df.shape[0]: | |
record = self.df.loc[self.last_rec_start:self.row_stop-1].as_matrix().flatten() | |
if len(record) == self.window_size*self.df.shape[1]: | |
self.batch_counter += 1 | |
self.last_rec_start += 1 | |
self.row_stop += 1 | |
return record | |
else: | |
self.batch_counter = 0 | |
self.last_rec_start = 0 | |
self.row_stop = self.last_rec_start + self.window_size | |
return self.window_maker() | |
def make_batch(self, batch_size): | |
batch_start_point = random.randrange(0, self.df.shape[0]-batch_size) | |
self.batch_df = self.df.iloc[batch_start_point:,:].reset_index(drop=True) | |
self.window_batch = np.empty((0,self.df.shape[1]*self.window_size), float) | |
for i in range(batch_size): | |
self.window_batch = np.append(self.window_batch, | |
self.window_maker().reshape((1, self.df.shape[1]*self.window_size)), axis=0) | |
return self.window_batch | |
# | |
def random_window_maker(self): | |
_ = False | |
while _ is False: | |
random_start = random.randrange(0, self.df.shape[0]) | |
self.last_rec_start = random_start | |
self.row_stop = self.last_rec_start + self.window_size | |
if self.row_stop <= self.df.shape[0]: | |
record = self.df.loc[self.last_rec_start:self.row_stop - 1].as_matrix().flatten() | |
_ = True | |
return record | |
if __name__ == "__main__": | |
df = pd.DataFrame(np.arange(0.0, 160.0).reshape((80, 2))) | |
df['target'] = ['target' + str(i) for i in range(80)] | |
# print(df.shape) | |
window_size = 3 | |
win = time_series_windows(df, window_size) | |
for i in range(10): | |
data = win.window_maker() | |
X = data[:-1] | |
y = data[-1] | |
print(i,'X out:', X) | |
print(i, 'X out:', y) | |
print('out:', win.random_window_maker()) | |
print(win.make_batch(8)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment