Skip to content

Instantly share code, notes, and snippets.

@literadix
Created April 3, 2022 08:23
Show Gist options
  • Save literadix/57c6687d808fbc0e92853d4e37f2d0cc to your computer and use it in GitHub Desktop.
Save literadix/57c6687d808fbc0e92853d4e37f2d0cc to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html#sklearn.preprocessing.OneHotEncoder
# https://datascienceparichay.com/article/numpy-hstack/
if __name__ == '__main__':
array = [['Male', 1, 'Green', 12, 5],
['Female', 2, 'Yellow', 13, 6],
['Female', 3, 'Red', 14, 8],
['Male', 4, 'Red', 16, 9]
]
df = pd.DataFrame(array, columns = ['Sex', 'Id', 'Color', 'Age', 'Class'])
print (df)
bin_features = ['Sex', 'Color']
num_features = ['Id', 'Age']
col_labels = ['Class']
# https: // www.statology.org / numpy - get - column /
enc = OneHotEncoder(handle_unknown='ignore', sparse = False)
enc.fit(df[bin_features].to_numpy())
print(f"Binary categories: {enc.categories_}")
X_bin = enc.transform(df[bin_features].to_numpy())
X_num = df[num_features].to_numpy()
X_lab = df[col_labels].to_numpy()
print(f"Num columns: {X_num}")
print(f"Bin columns: {X_bin}")
print(f"Label columns: {X_lab}")
ar_h = np.hstack((X_num, X_bin, X_lab))
print(f"Converted: {ar_h}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment