class CountEncoder:
def __init__(self, cols=None, miss_val="missing"):
self.cols = cols
self.miss_val = miss_val
self.count_maps_ = {}
def fit(self, X):
X = X.copy()
if self.cols is None:
self.cols = X.select_dtypes(include=["object", "category"]).columns.tolist()
for col in self.cols:
self.count_maps_[col] = (
X[col]
.cat.add_categories([self.miss_val])
.fillna(self.miss_val)
.value_counts()
.to_dict()
)
self.count_maps_[col][self.miss_val] = -1
return self
def transform(self, X):
X = X.copy()
for col in self.cols:
count_map = self.count_maps_.get(col, {})
X[col] = (
X[col]
.cat.add_categories([self.miss_val])
.fillna(self.miss_val)
.map(count_map)
.astype(int)
)
return X