Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save AlainOUYANG/5a76474da5ced28a6e3e42f29baa03fa to your computer and use it in GitHub Desktop.
Save AlainOUYANG/5a76474da5ced28a6e3e42f29baa03fa to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
def preprocessing(df, n_feats, train_len, win_len, fh):
"""
Preprocess to return training and test batches.
Args:
df (pd.DataFrame): The DataFrame that contains the data, with the target at the first colum.
n_feats (int): Number of features.
train_len (int): Training set length.
win_len (int): Input window length.
fh (int): Forecasting horizon.
Returns:
Flattened training/test features/targets in mini-batches
"""
# Split dataset
train, test = df.iloc[:train_len, :].values, df.iloc[train_len - win_len:, :].values
test_len = len(test)
# Normalization
normalizer = StandardScaler()
train = normalizer.fit_transform(train)
test = normalizer.transform(test)
# Mini-batching
X_train_batches = np.zeros([train_len - win_len - fh, win_len + n_feats - 1])
y_train_batches = np.zeros([train_len - win_len - fh, fh])
for i in range(0, train_len - win_len - fh):
X_train_batches[i] = np.concatenate((train[i:i + win_len - 1, 0], train[i + win_len - 1, :]), axis=None)
y_train_batches[i] = train[i + win_len:i + win_len + fh, 0]
X_test_batches = np.zeros([test_len - win_len - fh, win_len + n_feats - 1])
y_test_batches = np.zeros([test_len - win_len - fh, fh])
for i in range(0, test_len - win_len - fh):
X_test_batches[i] = np.concatenate((test[i:i + win_len - 1, 0], test[i + win_len - 1, :]), axis=None)
y_test_batches[i] = test[i + win_len:i + win_len + fh, 0]
return X_train_batches, y_train_batches, X_test_batches, y_test_batches, normalizer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment