Skip to content

Instantly share code, notes, and snippets.

@jmduke
Created January 19, 2014 08:29
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jmduke/8501972 to your computer and use it in GitHub Desktop.
Save jmduke/8501972 to your computer and use it in GitHub Desktop.
import nltk
import vincent
from curses.ascii import isdigit
from nltk.corpus import cmudict
import nltk.data
import sys
authors = ['kerouac', 'joyce', 'hemingway', 'milton', 'dickens', 'fitzgerald', 'nabokov', 'vonnegut']
d = cmudict.dict()
def nsyl(word):
lowercase = word.lower().strip('.,!"?;').lstrip('-')
if lowercase in d:
is_word = lambda word: isdigit(word[-1])
num_syllables = lambda word: len(filter(is_word, word))
return max(map(num_syllables, d[lowercase]))
else:
return 0
for author in authors:
filename = author + ".txt"
text = open(filename).read()
is_a_word = lambda word: len(word) > 1
words = filter(is_a_word, text.split())
sentences = filter(is_a_word, text.split("."))
words_per_sentence = len(words) / len(sentences)
reading_level = 206.835 - 1.015 * words_per_sentence - 84.6 * sum(map(nsyl, words)) / len(words)
print author + "," + str(len(words)) + "," + str(len(sentences)) + "," + str(words_per_sentence) + "," + str(reading_level) + "," + str(float(sum(map(nsyl, words))) / len(words))
sentence_lengths = [sum(map(nsyl, sentence.split())) for sentence in sentences]
line = vincent.Line(sentence_lengths[:100])
line.height = 150
line.scales['color'] = vincent.Scale(name='color', type='ordinal',
domain=vincent.DataRef(data='table', field='data.col'),
range=["#ff7f0e"])
line.width = 400
line.to_json(author + '.json', html_out=True, html_path=author + 'template.html')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment