Skip to content

Instantly share code, notes, and snippets.

@janzeteachesit
Forked from rinchik/TextAnalyzer.py
Created January 23, 2017 06:01
Show Gist options
  • Save janzeteachesit/427aba92cfd6270213a26776f74a8d2a to your computer and use it in GitHub Desktop.
Save janzeteachesit/427aba92cfd6270213a26776f74a8d2a to your computer and use it in GitHub Desktop.
Python class that does text analysis and compiles a hash map of words and word following given word sorted by frequency
from collections import Counter
import pprint
class TextAnalyzer:
text_file = 'example.txt'
raw_data = None
word_map = None
def __init__(self):
self.prepare_data()
self.analyze()
pprint.pprint(self.word_map)
def prepare_data(self):
with open(self.text_file, 'r') as example:
self.raw_data=example.read().replace('\n', ' ')
example.close()
def analyze(self):
words = self.raw_data.split()
word_pairs = [[words[i],words[i+1]] for i in range(len(words)-1)]
self.word_map = dict()
for word in list(set(words)):
for pair in word_pairs:
if word == pair[0]:
self.word_map.setdefault(word, []).append(pair[1])
self.word_map[word] = Counter(self.word_map[word]).most_common(11)
TextAnalyzer()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment