Skip to content

Instantly share code, notes, and snippets.

@pemagrg1
Created January 9, 2019 04:36
Show Gist options
  • Save pemagrg1/ea492283230bfa7d075ee08aa861e8cc to your computer and use it in GitHub Desktop.
Save pemagrg1/ea492283230bfa7d075ee08aa861e8cc to your computer and use it in GitHub Desktop.
one hot encoding using sklearn
from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
# define example
# data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot']
doc1 = "Can I eat the Pizza".lower()
doc2 = "You can eat the Pizza".lower()
doc1 = doc1.split()
doc2 = doc2.split()
doc1_array = array(doc1)
doc2_array = array(doc2)
doc3 = doc1+doc2
# doc3 = set(doc3)
data = list(doc3)
values = array(data)
print(values)
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
print(integer_encoded)
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
print(onehot_encoded)
# invert first example
inverted = label_encoder.inverse_transform([argmax(onehot_encoded[0, :])])
print(inverted)
"""
OUTPUT:
['can' 'i' 'eat' 'the' 'pizza' 'you' 'can' 'eat' 'the' 'pizza']
[0 2 1 4 3 5 0 1 4 3]
[[1. 0. 0. 0. 0. 0.]
[0. 0. 1. 0. 0. 0.]
[0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0.]
[0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0.]
[0. 0. 0. 1. 0. 0.]]
['can']
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment