Skip to content

Instantly share code, notes, and snippets.

@ayushoriginal
Created June 24, 2019 06:14
Show Gist options
  • Save ayushoriginal/970d64382efabd0a6bf83d70f8803604 to your computer and use it in GitHub Desktop.
Save ayushoriginal/970d64382efabd0a6bf83d70f8803604 to your computer and use it in GitHub Desktop.
encode_corpus
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
def encode_corpus(self,data):
encoded_docs = [one_hot(' '.join(d), self.vocab_length) for d in data]
vocab_file = "vocab_mapping.txt"+version
with open(vocab_file,'w') as f:
for i in range(len(data)):
for j in range(len(data[i])):
ss = data[i][j]+" "+str(encoded_docs[i][j])+"\n"
f.write(ss)
return pad_sequences(encoded_docs, maxlen=self.max_len, padding='post')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment