ConstantineLignos/occupy_zipf.py

## occupy_zipf.py
from collections import Counter
import nltk

TOP_PERCENT = .01

def prob_mass_top(counts, n):
    return sum(count for word, count in counts.most_common(n)) / float(sum(count.values()))

count = Counter(word.lower() for word in nltk.corpus.brown.words())

print "Top %d%% of types account for %2.1f%% of tokens" % \
      (TOP_PERCENT * 100, prob_mass_top(count, int(len(count) * TOP_PERCENT)) * 100)
	from collections import Counter
	import nltk

	TOP_PERCENT = .01

	def prob_mass_top(counts, n):
	return sum(count for word, count in counts.most_common(n)) / float(sum(count.values()))

	count = Counter(word.lower() for word in nltk.corpus.brown.words())

	print "Top %d%% of types account for %2.1f%% of tokens" % \
	(TOP_PERCENT * 100, prob_mass_top(count, int(len(count) * TOP_PERCENT)) * 100)