# data standardization with sklearn | |
from sklearn.preprocessing import StandardScaler | |
# copy of datasets | |
X_train_stand = X_train.copy() | |
X_test_stand = X_test.copy() | |
# numerical features | |
num_cols = ['Item_Weight','Item_Visibility','Item_MRP','Outlet_Establishment_Year'] | |
# apply standardization on numerical features | |
for i in num_cols: | |
# fit on training data column | |
scale = StandardScaler().fit(X_train_stand[[i]]) | |
# transform the training data column | |
X_train_stand[i] = scale.transform(X_train_stand[[i]]) | |
# transform the testing data column | |
X_test_stand[i] = scale.transform(X_test_stand[[i]]) |
This comment has been minimized.
This comment has been minimized.
Hey. So I made a copy of X_train in X_train_stand before standardizing it for comparisons later. However, the line was missing from the gist so I have just added it in. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
Hi, for L14, where did you define X_train_stand before?
I only have X_train and X_test after splitting my df X.