CMCDragonkai/labelencoder_onehotencoder.py

## labelencoder_onehotencoder.py
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


classes_df = pd.DataFrame({
    "class_id": ['n01669191', 'n01812337', 'n02007558', 'n02871439', 'n04306847',
                 'n10226413', 'n10267311', 'n12360108', 'n12662772', 'n13918274']
})

classes_df = classes_df.set_index("class_id", drop=False)
classes_df["class_index"] = np.arange(len(classes_df.index))

# label encoder encodes class ids to class indexes
le = LabelEncoder()
# one hot encoder encodes class ids to one hot vectors
ohe = OneHotEncoder(categories='auto', sparse=False)

le.fit(classes_df['class_id'])
ohe.fit(classes_df['class_id'].to_numpy().reshape(-1, 1))

# get by class id
classes_df.loc["n01669191"]

# get by class index
classes_df.iloc[0]

# this is how you get an array of indexes
indexes = le.transform([
    "n12662772",
    "n04306847"
])

# this is how you go from list of indexes to list of ids
le.inverse_transform(indexes)

# this is how you get an array of vectors
vectors = ohe.transform([
    ["n12662772"],
    ["n04306847"]
])

# this is you go from a list of vectors to a list of ids
ohe.inverse_transform(vectors).squeeze()

# using le and ohe
# you can use this for pretty much any classification machine learning system
	import numpy as np
	import pandas as pd
	from sklearn.preprocessing import LabelEncoder, OneHotEncoder


	classes_df = pd.DataFrame({
	"class_id": ['n01669191', 'n01812337', 'n02007558', 'n02871439', 'n04306847',
	'n10226413', 'n10267311', 'n12360108', 'n12662772', 'n13918274']
	})

	classes_df = classes_df.set_index("class_id", drop=False)
	classes_df["class_index"] = np.arange(len(classes_df.index))

	# label encoder encodes class ids to class indexes
	le = LabelEncoder()
	# one hot encoder encodes class ids to one hot vectors
	ohe = OneHotEncoder(categories='auto', sparse=False)

	le.fit(classes_df['class_id'])
	ohe.fit(classes_df['class_id'].to_numpy().reshape(-1, 1))

	# get by class id
	classes_df.loc["n01669191"]

	# get by class index
	classes_df.iloc[0]

	# this is how you get an array of indexes
	indexes = le.transform([
	"n12662772",
	"n04306847"
	])

	# this is how you go from list of indexes to list of ids
	le.inverse_transform(indexes)

	# this is how you get an array of vectors
	vectors = ohe.transform([
	["n12662772"],
	["n04306847"]
	])

	# this is you go from a list of vectors to a list of ids
	ohe.inverse_transform(vectors).squeeze()

	# using le and ohe
	# you can use this for pretty much any classification machine learning system