StevenReitsma/ChangepointDetection4.py

## ChangepointDetection4.py
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion, make_union
from sklearn.ensemble import RandomForestRegressor

def get_categorical_glossary(x):
    n_unique = len(np.unique(x[x.notnull()]))
    codes = range(0, n_unique)
    return dict(zip(np.unique(x), codes))

class ColumnSelector(BaseEstimator, TransformerMixin):
    """
    Selects columns by name
    """
    def __init__(self, colnames):
        self.colnames = colnames

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        return X[self.colnames]

class NumericEncoder(BaseEstimator, TransformerMixin):
    """
    Replaces categoricals with integer indices.
    We need this since the default scikit-learn LabelEncoder is not supported in Pipelines.
    """
    def fit(self, X, y=None):
        self.glossary = dict([(nm, get_categorical_glossary(X[nm])) for nm in X])
        return self

    def transform(self, X, y=None):
        X_new = X.copy()
        for nm in X:
            # if missing in dict, sets to NaN
            X_new[nm] = X[nm].map(self.glossary[nm])
        return X_new
	from sklearn.base import BaseEstimator, TransformerMixin
	from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion, make_union
	from sklearn.ensemble import RandomForestRegressor

	def get_categorical_glossary(x):
	n_unique = len(np.unique(x[x.notnull()]))
	codes = range(0, n_unique)
	return dict(zip(np.unique(x), codes))

	class ColumnSelector(BaseEstimator, TransformerMixin):
	"""
	Selects columns by name
	"""
	def __init__(self, colnames):
	self.colnames = colnames

	def fit(self, X, y=None):
	return self

	def transform(self, X, y=None):
	return X[self.colnames]

	class NumericEncoder(BaseEstimator, TransformerMixin):
	"""
	Replaces categoricals with integer indices.
	We need this since the default scikit-learn LabelEncoder is not supported in Pipelines.
	"""
	def fit(self, X, y=None):
	self.glossary = dict([(nm, get_categorical_glossary(X[nm])) for nm in X])
	return self

	def transform(self, X, y=None):
	X_new = X.copy()
	for nm in X:
	# if missing in dict, sets to NaN
	X_new[nm] = X[nm].map(self.glossary[nm])
	return X_new