Skip to content

Instantly share code, notes, and snippets.

@niklaskorz
Last active December 17, 2015 11:49
Show Gist options
  • Save niklaskorz/5605593 to your computer and use it in GitHub Desktop.
Save niklaskorz/5605593 to your computer and use it in GitHub Desktop.
from __future__ import print_function
from collections import Counter
import re
word_re = re.compile(r"\b[\w-]+\b")
letter_re = re.compile("[a-z]")
symbol_re = re.compile("[^\w\s]")
messages = [
"Top three most common words: '{0[0][0]}', '{0[1][0]}', '{0[2][0]}'",
"Top three most common letters: '{0[0][0]}', '{0[1][0]}', '{0[2][0]}'"
]
def analyse(text):
text = text.lower()
words = word_re.findall(text)
word_counter = Counter(words)
letters = letter_re.findall(text)
letter_counter = Counter(letters)
symbols = symbol_re.findall(text)
print(len(words), "words")
print(len(letters), "letters")
print(len(symbols), "symbols")
print(messages[0].format(word_counter.most_common(3)))
print(messages[1].format(letter_counter.most_common(3)))
if __name__ == "__main__":
from sys import argv, exit
if len(argv) < 2:
print("Usage:", __file__, "<file>")
exit(0)
with open(argv[1], "r") as f:
analyse(f.read())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment