jmduke/gist:8501972

## gistfile1.txt
import nltk
import vincent
from curses.ascii import isdigit
from nltk.corpus import cmudict
import nltk.data
import sys

authors = ['kerouac', 'joyce', 'hemingway', 'milton', 'dickens', 'fitzgerald', 'nabokov', 'vonnegut']
d = cmudict.dict()

def nsyl(word):
  lowercase = word.lower().strip('.,!"?;').lstrip('-')
  if lowercase in d:
    is_word = lambda word: isdigit(word[-1])
    num_syllables = lambda word: len(filter(is_word, word))
    return max(map(num_syllables, d[lowercase]))
  else:
    return 0

for author in authors:
    filename = author + ".txt"
    text = open(filename).read()

    is_a_word = lambda word: len(word) > 1

    words = filter(is_a_word, text.split())
    sentences = filter(is_a_word, text.split("."))
    words_per_sentence = len(words) / len(sentences)
    reading_level = 206.835 - 1.015 * words_per_sentence - 84.6 * sum(map(nsyl, words)) / len(words)

    print author + "," + str(len(words)) + "," + str(len(sentences)) + "," + str(words_per_sentence) + "," + str(reading_level) + "," + str(float(sum(map(nsyl, words))) / len(words))

    sentence_lengths = [sum(map(nsyl, sentence.split())) for sentence in sentences]
    line = vincent.Line(sentence_lengths[:100])
    line.height = 150
    line.scales['color'] = vincent.Scale(name='color', type='ordinal',
                            domain=vincent.DataRef(data='table', field='data.col'),
                            range=["#ff7f0e"])
    line.width = 400
    line.to_json(author + '.json', html_out=True, html_path=author + 'template.html')
	import nltk
	import vincent
	from curses.ascii import isdigit
	from nltk.corpus import cmudict
	import nltk.data
	import sys

	authors = ['kerouac', 'joyce', 'hemingway', 'milton', 'dickens', 'fitzgerald', 'nabokov', 'vonnegut']
	d = cmudict.dict()

	def nsyl(word):
	lowercase = word.lower().strip('.,!"?;').lstrip('-')
	if lowercase in d:
	is_word = lambda word: isdigit(word[-1])
	num_syllables = lambda word: len(filter(is_word, word))
	return max(map(num_syllables, d[lowercase]))
	else:
	return 0

	for author in authors:
	filename = author + ".txt"
	text = open(filename).read()

	is_a_word = lambda word: len(word) > 1

	words = filter(is_a_word, text.split())
	sentences = filter(is_a_word, text.split("."))
	words_per_sentence = len(words) / len(sentences)
	reading_level = 206.835 - 1.015 * words_per_sentence - 84.6 * sum(map(nsyl, words)) / len(words)

	print author + "," + str(len(words)) + "," + str(len(sentences)) + "," + str(words_per_sentence) + "," + str(reading_level) + "," + str(float(sum(map(nsyl, words))) / len(words))

	sentence_lengths = [sum(map(nsyl, sentence.split())) for sentence in sentences]
	line = vincent.Line(sentence_lengths[:100])
	line.height = 150
	line.scales['color'] = vincent.Scale(name='color', type='ordinal',
	domain=vincent.DataRef(data='table', field='data.col'),
	range=["#ff7f0e"])
	line.width = 400
	line.to_json(author + '.json', html_out=True, html_path=author + 'template.html')