Skip to content

Instantly share code, notes, and snippets.

@abhisheksoni27
Last active July 6, 2018 15:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abhisheksoni27/b4ffb84fd8fb43e9daf20a269fcbed15 to your computer and use it in GitHub Desktop.
Save abhisheksoni27/b4ffb84fd8fb43e9daf20a269fcbed15 to your computer and use it in GitHub Desktop.
import sys
from collections import Counter
import re
import matplotlib.pyplot as plt
import numpy as np
from utilities import cleanText
def analyze(name):
linesText = cleanText(name + ".txt")
wordTextArray = " ".join(linesText).split(' ')
# Now instead of lines, we have words (e.g.: ["how", "are", "you"])
targetWords = [x for x in wordTextArray if x.find('.af') != -1]
# Get the topmost 10 word, and their frequency
labels, frequency = zip(*Counter(targetWords).most_common(10))
# Just a matplotlib shenanigan
indexes = np.arange(len(labels))
width = 0.5
plt.bar(indexes, values, width)
# To show the words on the plot, or else..?
plt.xticks(indexes - 0.25 + width * 0.5, labels)
plt.show()
analyze(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment