Created
July 7, 2020 06:52
-
-
Save CMCDragonkai/6ed11a9b0c1d77d09f8f227489843eaa to your computer and use it in GitHub Desktop.
LabelEncoder and OneHotEncoder #python #sklearn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from sklearn.preprocessing import LabelEncoder, OneHotEncoder | |
classes_df = pd.DataFrame({ | |
"class_id": ['n01669191', 'n01812337', 'n02007558', 'n02871439', 'n04306847', | |
'n10226413', 'n10267311', 'n12360108', 'n12662772', 'n13918274'] | |
}) | |
classes_df = classes_df.set_index("class_id", drop=False) | |
classes_df["class_index"] = np.arange(len(classes_df.index)) | |
# label encoder encodes class ids to class indexes | |
le = LabelEncoder() | |
# one hot encoder encodes class ids to one hot vectors | |
ohe = OneHotEncoder(categories='auto', sparse=False) | |
le.fit(classes_df['class_id']) | |
ohe.fit(classes_df['class_id'].to_numpy().reshape(-1, 1)) | |
# get by class id | |
classes_df.loc["n01669191"] | |
# get by class index | |
classes_df.iloc[0] | |
# this is how you get an array of indexes | |
indexes = le.transform([ | |
"n12662772", | |
"n04306847" | |
]) | |
# this is how you go from list of indexes to list of ids | |
le.inverse_transform(indexes) | |
# this is how you get an array of vectors | |
vectors = ohe.transform([ | |
["n12662772"], | |
["n04306847"] | |
]) | |
# this is you go from a list of vectors to a list of ids | |
ohe.inverse_transform(vectors).squeeze() | |
# using le and ohe | |
# you can use this for pretty much any classification machine learning system |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment