Skip to content

Instantly share code, notes, and snippets.

@khuangaf
Last active March 31, 2019 19:19
Show Gist options
  • Save khuangaf/d01c6f4992705c44151dca2091e13a1b to your computer and use it in GitHub Desktop.
Save khuangaf/d01c6f4992705c44151dca2091e13a1b to your computer and use it in GitHub Desktop.
This class is intended for faster and simpler categorical/ label encoding on large data, in which sklearn's LabelEncoder might be too slow.
'''
Author: Kung-hsiang, Huang (Steeve)
Date: 2019/Mar/15
'''
class CategoricalEncoder():
'''
This class is for those operating on large data, in which sklearn's LabelEncoder class may take too much time.
This encoder is only suitable for 1-d array/ list. You may modify it to become n-d compatible.
'''
def __init__(self):
self.f_dict = {}
self.r_dict = {}
def fit(self, array):
'''
:param array: list or np array
:return: None
'''
unique_elements = set(array)
self.n_elements = 0
for e in unique_elements:
self.f_dict[e] = self.n_elements
self.r_dict[self.n_elements] = e
self.n_elements += 1
def reverse_transform(self, transformed_array, to_np=False):
'''
:param transformed_array: list or np array
:return: array: np array with the same shape as input
'''
array = [self.r_dict[e] for e in transformed_array]
if to_np:
array = np.array(array)
return array
def transform(self, array, to_np=False):
'''
:param array: array list or np array
:return: list or np array with the same shape as the input
'''
transformed_array = [self.f_dict[e] for e in array]
if to_np:
transformed_array = np.array(transformed_array)
return transformed_array
def fit_transform(self, array, to_np=False):
'''
:param array: array list or np array
:return: list or np array with the same shape as the input
'''
self.fit(array)
return self.transform(array, to_np)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment