Skip to content

Instantly share code, notes, and snippets.

View StevenReitsma's full-sized avatar

Steven Reitsma StevenReitsma

View GitHub Profile
@StevenReitsma
StevenReitsma / ChangepointDetection2.py
Created January 16, 2018 16:15
Blogpost-Changepoint-Detection-Snippet2
np.random.seed(25)
df_truck = generate_data(365, 300)
df_truck.head()
@StevenReitsma
StevenReitsma / ChangepointDetection3.py
Created January 16, 2018 16:17
Blogpost-Changepoint-Detection-Snippet3
y = df_truck['l_100km']
X = df_truck[['area', 'headwind', 'weight_kg']]
X_train, y_train = map(lambda x: x[:265], [X, y])
X_test, y_test = map(lambda x: x[265:], [X, y])
@StevenReitsma
StevenReitsma / ChangepointDetection4.py
Last active January 17, 2018 07:39
Blogpost-Changepoint-Detection-Snippet4
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion, make_union
from sklearn.ensemble import RandomForestRegressor
def get_categorical_glossary(x):
n_unique = len(np.unique(x[x.notnull()]))
codes = range(0, n_unique)
return dict(zip(np.unique(x), codes))
class ColumnSelector(BaseEstimator, TransformerMixin):
@StevenReitsma
StevenReitsma / ChangepointDetection5.py
Last active January 17, 2018 07:41
Blogpost-Changepoint-Detection-Snippet5
# Numeric variables
numeric_pipeline = make_pipeline(ColumnSelector(['weight_kg']))
# Categorical variables
categorical_pipeline = make_pipeline(ColumnSelector(['area', 'headwind']), NumericEncoder())
# Combining numerical and categorical data preprocessing
preproc_union = make_union(numeric_pipeline, categorical_pipeline)
# Final pipeline
@StevenReitsma
StevenReitsma / ChangepointDetection6.py
Created January 17, 2018 07:42
Blogpost-Changepoint-Detection-Snippet6
y_test_predictions = estimation_pipeline.predict(X_test)
prediction_errors = y_test - y_test_predictions
@StevenReitsma
StevenReitsma / ChangepointDetection7.py
Created January 17, 2018 07:46
Blogpost-Changepoint-Detection-Snippet7
# Load packages
from rpy2.robjects import FloatVector
from rpy2.robjects.packages import importr
# Function to convert an R object to a Python dictionary
def robj_to_dict(robj):
return dict(zip(robj.names, map(list, robj)))
@StevenReitsma
StevenReitsma / ChangepointDetection8.py
Created January 17, 2018 07:50
Blogpost-Changepoint-Detection-Snippet8
cpm = importr('cpm')
@StevenReitsma
StevenReitsma / ChangepointDetection9.py
Created January 17, 2018 07:53
Blogpost-Changepoint-Detection-Snippet9
cpm_result = cpm.detectChangePoint(FloatVector(prediction_errors), cpmType='Student', ARL0=1000)
cpm_result = robj_to_dict(cpm_result)
@StevenReitsma
StevenReitsma / Blogpost-Heineken1.py
Created February 23, 2018 09:43
Blogpost-Heineken1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
@StevenReitsma
StevenReitsma / Blogpost-Heineken2.py
Created February 23, 2018 09:47
Blogpost-Heineken2
train = pd.read_csv('train.csv', parse_dates=['Date'])
train.columns = train.columns.str.lower()
features = pd.read_csv('features.csv', parse_dates=['Date'])
features.columns = features.columns.str.lower()
features[['markdown1', 'markdown2', 'markdown3','markdown4', 'markdown5']] = features[['markdown1', 'markdown2', 'markdown3','markdown4', 'markdown5']].fillna(0)
stores = pd.read_csv('stores.csv')
stores.columns = stores.columns.str.lower()
df_base = (
train
.merge(features, how='inner')