Skip to content

Instantly share code, notes, and snippets.

@NMZivkovic
Created March 24, 2018 19:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NMZivkovic/f4c4baa4067775de165fc17f4d589132 to your computer and use it in GitHub Desktop.
Save NMZivkovic/f4c4baa4067775de165fc17f4d589132 to your computer and use it in GitHub Desktop.
import numpy as np
import collections
class DataHandler:
def read_data(self, fname):
with open(fname) as f:
content = f.readlines()
content = [x.strip() for x in content]
content = [content[i].split() for i in range(len(content))]
content = np.array(content)
content = np.reshape(content, [-1, ])
return content
def build_datasets(self, words):
count = collections.Counter(words).most_common()
dictionary = dict()
for word, _ in count:
dictionary[word] = len(dictionary)
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
return dictionary, reverse_dictionary
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment