| class CountEncoder: | |
| def __init__(self, cols=None, miss_val="missing"): | |
| self.cols = cols | |
| self.miss_val = miss_val | |
| self.count_maps_ = {} | |
| def fit(self, X): | |
| X = X.copy() | |
| if self.cols is None: | |
| self.cols = X.select_dtypes(include=["object", "category"]).columns.tolist() | |
| for col in self.cols: | |
| self.count_maps_[col] = ( | |
| X[col] | |
| .cat.add_categories([self.miss_val]) | |
| .fillna(self.miss_val) | |
| .value_counts() | |
| .to_dict() | |
| ) | |
| self.count_maps_[col][self.miss_val] = -1 | |
| return self | |
| def transform(self, X): | |
| X = X.copy() | |
| for col in self.cols: | |
| count_map = self.count_maps_.get(col, {}) | |
| X[col] = ( | |
| X[col] | |
| .cat.add_categories([self.miss_val]) | |
| .fillna(self.miss_val) | |
| .map(count_map) | |
| .astype(int) | |
| ) | |
| return X |