This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
np.random.seed(25) | |
df_truck = generate_data(365, 300) | |
df_truck.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y = df_truck['l_100km'] | |
X = df_truck[['area', 'headwind', 'weight_kg']] | |
X_train, y_train = map(lambda x: x[:265], [X, y]) | |
X_test, y_test = map(lambda x: x[265:], [X, y]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import BaseEstimator, TransformerMixin | |
from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion, make_union | |
from sklearn.ensemble import RandomForestRegressor | |
def get_categorical_glossary(x): | |
n_unique = len(np.unique(x[x.notnull()])) | |
codes = range(0, n_unique) | |
return dict(zip(np.unique(x), codes)) | |
class ColumnSelector(BaseEstimator, TransformerMixin): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Numeric variables | |
numeric_pipeline = make_pipeline(ColumnSelector(['weight_kg'])) | |
# Categorical variables | |
categorical_pipeline = make_pipeline(ColumnSelector(['area', 'headwind']), NumericEncoder()) | |
# Combining numerical and categorical data preprocessing | |
preproc_union = make_union(numeric_pipeline, categorical_pipeline) | |
# Final pipeline |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y_test_predictions = estimation_pipeline.predict(X_test) | |
prediction_errors = y_test - y_test_predictions |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load packages | |
from rpy2.robjects import FloatVector | |
from rpy2.robjects.packages import importr | |
# Function to convert an R object to a Python dictionary | |
def robj_to_dict(robj): | |
return dict(zip(robj.names, map(list, robj))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cpm = importr('cpm') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cpm_result = cpm.detectChangePoint(FloatVector(prediction_errors), cpmType='Student', ARL0=1000) | |
cpm_result = robj_to_dict(cpm_result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
%matplotlib inline |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train = pd.read_csv('train.csv', parse_dates=['Date']) | |
train.columns = train.columns.str.lower() | |
features = pd.read_csv('features.csv', parse_dates=['Date']) | |
features.columns = features.columns.str.lower() | |
features[['markdown1', 'markdown2', 'markdown3','markdown4', 'markdown5']] = features[['markdown1', 'markdown2', 'markdown3','markdown4', 'markdown5']].fillna(0) | |
stores = pd.read_csv('stores.csv') | |
stores.columns = stores.columns.str.lower() | |
df_base = ( | |
train | |
.merge(features, how='inner') |
OlderNewer