Skip to content

Instantly share code, notes, and snippets.

@pemagrg1
Created January 9, 2019 04:31
Show Gist options
  • Save pemagrg1/4a9141f79e91bfc0307cc13ad03e8152 to your computer and use it in GitHub Desktop.
Save pemagrg1/4a9141f79e91bfc0307cc13ad03e8152 to your computer and use it in GitHub Desktop.
one hot encoding using numpy
import numpy as np
docs = "Can I eat the Pizza".lower().split()
doc1 = set(docs)
doc1 = sorted(doc1)
print ("\nvalues: ", doc1)
integer_encoded = []
for i in docs:
v = np.where( np.array(doc1) == i)[0][0]
integer_encoded.append(v)
print ("\ninteger encoded: ",integer_encoded)
def get_vec(len_doc,word):
empty_vector = [0] * len_doc
vect = 0
find = np.where( np.array(doc1) == word)[0][0]
empty_vector[find] = 1
return empty_vector
def get_matrix(doc1):
mat = []
len_doc = len(doc1)
for i in docs:
vec = get_vec(len_doc,i)
mat.append(vec)
return np.asarray(mat)
print ("\nMATRIX:")
print (get_matrix(doc1))
"""
OUTPUT:
values: ['can', 'eat', 'i', 'pizza', 'the']
integer encoded: [0, 2, 1, 4, 3]
MATRIX:
[[1 0 0 0 0]
[0 0 1 0 0]
[0 1 0 0 0]
[0 0 0 0 1]
[0 0 0 1 0]]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment