fnielsen/brownzipf.py

## brownzipf.py
from __future__ import division
from itertools import *
from pylab import *
from nltk.corpus import brown
from string import lower
from collections import Counter


# The data: token counts from the Brown corpus
tokens_with_count = Counter(imap(lower, brown.words()))
counts = array(tokens_with_count.values())
tokens = tokens_with_count.keys()

# A Zipf plot
ranks = arange(1, len(counts)+1)
indices = argsort(-counts)
frequencies = counts[indices]
loglog(ranks, frequencies, marker=".")
title("Zipf plot for Brown corpus tokens")
xlabel("Frequency rank of token")
ylabel("Absolute frequency of token")
grid(True)
for n in list(logspace(-0.5, log10(len(counts)), 20).astype(int)):
    dummy = text(ranks[n], frequencies[n], " " + tokens[indices[n]],
                 verticalalignment="bottom",
                 horizontalalignment="left")

show()
	from __future__ import division
	from itertools import *
	from pylab import *
	from nltk.corpus import brown
	from string import lower
	from collections import Counter


	# The data: token counts from the Brown corpus
	tokens_with_count = Counter(imap(lower, brown.words()))
	counts = array(tokens_with_count.values())
	tokens = tokens_with_count.keys()

	# A Zipf plot
	ranks = arange(1, len(counts)+1)
	indices = argsort(-counts)
	frequencies = counts[indices]
	loglog(ranks, frequencies, marker=".")
	title("Zipf plot for Brown corpus tokens")
	xlabel("Frequency rank of token")
	ylabel("Absolute frequency of token")
	grid(True)
	for n in list(logspace(-0.5, log10(len(counts)), 20).astype(int)):
	dummy = text(ranks[n], frequencies[n], " " + tokens[indices[n]],
	verticalalignment="bottom",
	horizontalalignment="left")

	show()