This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import warnings | |
warnings.filterwarnings('ignore') | |
predictions, test_indices = cross_val_predict(multi, X_pd_num, y=y, cv=multiple_ts_split, n_jobs=-1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pred_lgb = pd.DataFrame(predictions) | |
pred_lgb.columns = ['pred_lgb1', 'pred_lgb2'] | |
df_pred_fc = df_features.copy().reset_index(drop=True) | |
for col in pred_lgb.columns: | |
df_pred_fc[col] = np.nan | |
df_pred_fc.loc[test_indices, col] = pred_lgb[col].values | |
validate = df_pred_fc.dropna() | |
selected = validate[['store', 'dept']].drop_duplicates().sample(1) | |
( | |
validate |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
( | |
validate | |
.merge(selected, how='inner') | |
.set_index('date') | |
[['pred_lgb2', 'weekly_sales2']] | |
.plot(figsize=(10,4)) | |
) | |
plt.title(selected.reset_index(drop=True)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection._split import check_cv | |
from sklearn.model_selection._validation import _fit_and_predict | |
from sklearn.externals.joblib import Parallel, delayed, logger | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.base import is_classifier, clone | |
from sklearn.utils import indexable | |
import scipy.sparse as sp | |
def cross_val_predict(estimator, X, y=None, groups=None, cv=None, n_jobs=1, | |
verbose=0, fit_params=None, pre_dispatch='2*n_jobs', | |
method='predict'): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.pipeline import Pipeline, FeatureUnion | |
from sklearn.preprocessing import OneHotEncoder, StandardScaler | |
transformer = Pipeline([ | |
('features', FeatureUnion(n_jobs=1, transformer_list=[ | |
# Part 1 | |
('boolean', Pipeline([ | |
('selector', TypeSelector('bool')), | |
])), # booleans close |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example dataframe with different data types. | |
df = pd.DataFrame({ | |
'boolean_column': [True,False,True,False], | |
'integer_column': [1,2,3,4], | |
'float_column': [1.,2.,3.,4.] | |
}) | |
# Selecting booleans | |
boolean_columns = df.select_dtypes(include=['bool']) | |
# Selecting numericals | |
numerical_columns = df.select_dtypes(include=[np.number]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import BaseEstimator, TransformerMixin | |
class TypeSelector(BaseEstimator, TransformerMixin): | |
def __init__(self, dtype): | |
self.dtype = dtype | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X): | |
assert isinstance(X, pd.DataFrame) | |
return X.select_dtypes(include=[self.dtype]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Transforming a column into a category type | |
df = pd.DataFrame({'eye_color': ['green', np.nan, 'blue', 'brown']}) | |
df['eye_color'] = df['eye_color'].astype('category') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Inspecting the category codes. | |
df['eye_color'].cat.codes | |
# Inspecting the categories | |
df['eye_color'].cat.categories |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class StringIndexer(BaseEstimator, TransformerMixin): | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X): | |
assert isinstance(X, pd.DataFrame) | |
return X.apply(lambda s: s.cat.codes.replace( | |
{-1: len(s.cat.categories)} | |
)) |