Skip to content

Instantly share code, notes, and snippets.

@paulochf
Created January 16, 2018 16:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save paulochf/ae23ae4388cdc51e1460416d0e961104 to your computer and use it in GitHub Desktop.
Save paulochf/ae23ae4388cdc51e1460416d0e961104 to your computer and use it in GitHub Desktop.
A label enconding transformer
from sklearn.base import BaseEstimator, TransformerMixin
class NoFitMixin:
"""Gives fit method by inheritance."""
def fit(self, X, y=None, **fit_params):
return self
class LabelFeaturizer(BaseEstimator, TransformerMixin, NoFitMixin):
"""
Transformer which converts given columns to
pandas category type and creates another column
with the counts for the respective category.
"""
def __init__(self, columns):
self.columns = columns
self.count_dict = {}
def transform(self, df):
df_ = df.copy()
for column in self.columns:
df_[column] = df[column].astype("category").cat.codes
df_[column] = df_[column].astype("category")
self.count_dict[column] = df_[column].value_counts().to_dict()
df_[column + "_label_count"] = df[column].apply(lambda x: self.count_dict[column].get(x, 1))
return df_
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment