drorata/StandartizeFloatCols.py

## StandartizeFloatCols.py
import pandas as pd
import sklearn
from sklearn.preprocessing import StandardScaler


class GetDummiesCatCols(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin):
    """Replace `cols` with their dummies (One Hot Encoding).

    `cols` should be a list of column names holding categorical data.
    Furthermore, this class streamlines the implementation of one hot encoding
    as available on [pandas.get_dummies](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """

    def __init__(self, cols=None):
        self.cols = cols

    def transform(self, df, **transform_params):
        cols_dummy = pd.get_dummies(df[self.cols])
        df = df.drop(self.cols, axis=1)
        df = pd.concat([df, cols_dummy], axis=1)
        return df

    def fit(self, df, y=None, **fit_params):
        return self


class StandartizeFloatCols(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin):
    """Standard-scale the columns in the data frame.

    `cols` should be a list of columns in the data.
    """

    def __init__(self, cols=None):
        self.cols = cols
        self.standard_scaler = StandardScaler()

    def transform(self, df, **transform_params):
        standartize_cols = pd.DataFrame(
            # StandardScaler returns a NumPy.array, and thus indexing
            # breaks. Explicitly fixed next.
            self.standard_scaler.transform(df[self.cols]),
            columns=self.cols,
            # The index of the resulting DataFrame should be assigned and
            # equal to the one of the original DataFrame. Otherwise, upon
            # concatenation NaNs will be introduced.
            index = df.index
        )
        df = df.drop(self.cols, axis=1)
        df = pd.concat([df, standartize_cols], axis=1)
        return df

    def fit(self, df, y=None, **fit_params):
        self.standard_scaler.fit(df[self.cols])
        return self
	import pandas as pd
	import sklearn
	from sklearn.preprocessing import StandardScaler


	class GetDummiesCatCols(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin):
	"""Replace `cols` with their dummies (One Hot Encoding).

	`cols` should be a list of column names holding categorical data.
	Furthermore, this class streamlines the implementation of one hot encoding
	as available on [pandas.get_dummies](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
	"""

	def __init__(self, cols=None):
	self.cols = cols

	def transform(self, df, **transform_params):
	cols_dummy = pd.get_dummies(df[self.cols])
	df = df.drop(self.cols, axis=1)
	df = pd.concat([df, cols_dummy], axis=1)
	return df

	def fit(self, df, y=None, **fit_params):
	return self


	class StandartizeFloatCols(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin):
	"""Standard-scale the columns in the data frame.

	`cols` should be a list of columns in the data.
	"""

	def __init__(self, cols=None):
	self.cols = cols
	self.standard_scaler = StandardScaler()

	def transform(self, df, **transform_params):
	standartize_cols = pd.DataFrame(
	# StandardScaler returns a NumPy.array, and thus indexing
	# breaks. Explicitly fixed next.
	self.standard_scaler.transform(df[self.cols]),
	columns=self.cols,
	# The index of the resulting DataFrame should be assigned and
	# equal to the one of the original DataFrame. Otherwise, upon
	# concatenation NaNs will be introduced.
	index = df.index
	)
	df = df.drop(self.cols, axis=1)
	df = pd.concat([df, standartize_cols], axis=1)
	return df

	def fit(self, df, y=None, **fit_params):
	self.standard_scaler.fit(df[self.cols])
	return self