Steven Reitsma StevenReitsma

## ChangepointDetection2.py
np.random.seed(25)
df_truck = generate_data(365, 300)
df_truck.head()

## ChangepointDetection3.py
y = df_truck['l_100km']
X = df_truck[['area', 'headwind', 'weight_kg']]

X_train, y_train = map(lambda x: x[:265], [X, y])
X_test, y_test = map(lambda x: x[265:], [X, y])

## ChangepointDetection4.py
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion, make_union
from sklearn.ensemble import RandomForestRegressor

def get_categorical_glossary(x):
    n_unique = len(np.unique(x[x.notnull()]))
    codes = range(0, n_unique)
    return dict(zip(np.unique(x), codes))

class ColumnSelector(BaseEstimator, TransformerMixin):

## ChangepointDetection5.py
# Numeric variables
numeric_pipeline = make_pipeline(ColumnSelector(['weight_kg']))

# Categorical variables
categorical_pipeline = make_pipeline(ColumnSelector(['area', 'headwind']), NumericEncoder())

# Combining numerical and categorical data preprocessing
preproc_union = make_union(numeric_pipeline, categorical_pipeline)

# Final pipeline

## ChangepointDetection6.py
y_test_predictions = estimation_pipeline.predict(X_test)
prediction_errors = y_test - y_test_predictions

## ChangepointDetection7.py
# Load packages
from rpy2.robjects import FloatVector
from rpy2.robjects.packages import importr

# Function to convert an R object to a Python dictionary
def robj_to_dict(robj):
    return dict(zip(robj.names, map(list, robj)))

## ChangepointDetection8.py
cpm = importr('cpm')

## ChangepointDetection9.py
cpm_result = cpm.detectChangePoint(FloatVector(prediction_errors), cpmType='Student', ARL0=1000)
cpm_result = robj_to_dict(cpm_result)

## Blogpost-Heineken1.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Blogpost-Heineken2.py
train = pd.read_csv('train.csv', parse_dates=['Date'])
train.columns = train.columns.str.lower()
features = pd.read_csv('features.csv', parse_dates=['Date'])
features.columns = features.columns.str.lower()
features[['markdown1', 'markdown2', 'markdown3','markdown4', 'markdown5']] = features[['markdown1', 'markdown2', 'markdown3','markdown4', 'markdown5']].fillna(0)
stores = pd.read_csv('stores.csv')
stores.columns = stores.columns.str.lower()
df_base = (
    train
    .merge(features, how='inner')
	np.random.seed(25)
	df_truck = generate_data(365, 300)
	df_truck.head()
	y = df_truck['l_100km']
	X = df_truck[['area', 'headwind', 'weight_kg']]

	X_train, y_train = map(lambda x: x[:265], [X, y])
	X_test, y_test = map(lambda x: x[265:], [X, y])
	from sklearn.base import BaseEstimator, TransformerMixin
	from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion, make_union
	from sklearn.ensemble import RandomForestRegressor

	def get_categorical_glossary(x):
	n_unique = len(np.unique(x[x.notnull()]))
	codes = range(0, n_unique)
	return dict(zip(np.unique(x), codes))

	class ColumnSelector(BaseEstimator, TransformerMixin):
	# Numeric variables
	numeric_pipeline = make_pipeline(ColumnSelector(['weight_kg']))

	# Categorical variables
	categorical_pipeline = make_pipeline(ColumnSelector(['area', 'headwind']), NumericEncoder())

	# Combining numerical and categorical data preprocessing
	preproc_union = make_union(numeric_pipeline, categorical_pipeline)

	# Final pipeline
	y_test_predictions = estimation_pipeline.predict(X_test)
	prediction_errors = y_test - y_test_predictions
	# Load packages
	from rpy2.robjects import FloatVector
	from rpy2.robjects.packages import importr

	# Function to convert an R object to a Python dictionary
	def robj_to_dict(robj):
	return dict(zip(robj.names, map(list, robj)))
	cpm_result = cpm.detectChangePoint(FloatVector(prediction_errors), cpmType='Student', ARL0=1000)
	cpm_result = robj_to_dict(cpm_result)
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	%matplotlib inline
	train = pd.read_csv('train.csv', parse_dates=['Date'])
	train.columns = train.columns.str.lower()
	features = pd.read_csv('features.csv', parse_dates=['Date'])
	features.columns = features.columns.str.lower()
	features[['markdown1', 'markdown2', 'markdown3','markdown4', 'markdown5']] = features[['markdown1', 'markdown2', 'markdown3','markdown4', 'markdown5']].fillna(0)
	stores = pd.read_csv('stores.csv')
	stores.columns = stores.columns.str.lower()
	df_base = (
	train
	.merge(features, how='inner')