Skip to content

Instantly share code, notes, and snippets.

View StevenReitsma's full-sized avatar

Steven Reitsma StevenReitsma

View GitHub Profile
@StevenReitsma
StevenReitsma / Blogpost-Heineken12.py
Created February 23, 2018 11:58
Blogpost-Heineken12
import warnings
warnings.filterwarnings('ignore')
predictions, test_indices = cross_val_predict(multi, X_pd_num, y=y, cv=multiple_ts_split, n_jobs=-1)
@StevenReitsma
StevenReitsma / Blogpost-Heineken13.py
Created February 23, 2018 11:59
Blogpost-Heineken13
pred_lgb = pd.DataFrame(predictions)
pred_lgb.columns = ['pred_lgb1', 'pred_lgb2']
df_pred_fc = df_features.copy().reset_index(drop=True)
for col in pred_lgb.columns:
df_pred_fc[col] = np.nan
df_pred_fc.loc[test_indices, col] = pred_lgb[col].values
validate = df_pred_fc.dropna()
selected = validate[['store', 'dept']].drop_duplicates().sample(1)
(
validate
@StevenReitsma
StevenReitsma / Blogpost-Heineken14.py
Created February 23, 2018 12:00
Blogpost-Heineken14
(
validate
.merge(selected, how='inner')
.set_index('date')
[['pred_lgb2', 'weekly_sales2']]
.plot(figsize=(10,4))
)
plt.title(selected.reset_index(drop=True))
@StevenReitsma
StevenReitsma / Blogpost-Heineken15.py
Created February 23, 2018 12:01
Blogpost-Heineken15
from sklearn.model_selection._split import check_cv
from sklearn.model_selection._validation import _fit_and_predict
from sklearn.externals.joblib import Parallel, delayed, logger
from sklearn.preprocessing import LabelEncoder
from sklearn.base import is_classifier, clone
from sklearn.utils import indexable
import scipy.sparse as sp
def cross_val_predict(estimator, X, y=None, groups=None, cv=None, n_jobs=1,
verbose=0, fit_params=None, pre_dispatch='2*n_jobs',
method='predict'):
@StevenReitsma
StevenReitsma / Blogpost-Pandas1.py
Created February 23, 2018 12:14
Blogpost-Pandas1
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import OneHotEncoder, StandardScaler
transformer = Pipeline([
('features', FeatureUnion(n_jobs=1, transformer_list=[
# Part 1
('boolean', Pipeline([
('selector', TypeSelector('bool')),
])), # booleans close
@StevenReitsma
StevenReitsma / Blogpost-Pandas2.py
Created February 23, 2018 12:18
Blogpost-Pandas2
# Example dataframe with different data types.
df = pd.DataFrame({
'boolean_column': [True,False,True,False],
'integer_column': [1,2,3,4],
'float_column': [1.,2.,3.,4.]
})
# Selecting booleans
boolean_columns = df.select_dtypes(include=['bool'])
# Selecting numericals
numerical_columns = df.select_dtypes(include=[np.number])
@StevenReitsma
StevenReitsma / Blogpost-Pandas3.py
Created February 23, 2018 12:19
Blogpost-Pandas3
from sklearn.base import BaseEstimator, TransformerMixin
class TypeSelector(BaseEstimator, TransformerMixin):
def __init__(self, dtype):
self.dtype = dtype
def fit(self, X, y=None):
return self
def transform(self, X):
assert isinstance(X, pd.DataFrame)
return X.select_dtypes(include=[self.dtype])
@StevenReitsma
StevenReitsma / Blogpost-Pandas4.py
Created February 23, 2018 12:21
Blogpost-Pandas4
# Transforming a column into a category type
df = pd.DataFrame({'eye_color': ['green', np.nan, 'blue', 'brown']})
df['eye_color'] = df['eye_color'].astype('category')
@StevenReitsma
StevenReitsma / Blogpost-Pandas5.py
Created February 23, 2018 12:22
Blogpost-Pandas5
# Inspecting the category codes.
df['eye_color'].cat.codes
# Inspecting the categories
df['eye_color'].cat.categories
@StevenReitsma
StevenReitsma / Blogpost-Pandas6.py
Created February 23, 2018 12:23
Blogpost-Pandas6
class StringIndexer(BaseEstimator, TransformerMixin):
def fit(self, X, y=None):
return self
def transform(self, X):
assert isinstance(X, pd.DataFrame)
return X.apply(lambda s: s.cat.codes.replace(
{-1: len(s.cat.categories)}
))