Skip to content

Instantly share code, notes, and snippets.

@emres
Created May 23, 2010 10:23
Show Gist options
  • Save emres/410825 to your computer and use it in GitHub Desktop.
Save emres/410825 to your computer and use it in GitHub Desktop.
import matplotlib.pyplot as plt
word_frequencies = []
for line in open("top100.txt"): word_frequencies.append(int(line))
word_freq_ideal = []
d = 1
for x in word_frequencies:
word_freq_ideal.append(word_frequencies[0] / d)
d = d + 1
plt.plot([log(i) for i in range(1, 1 + len(word_frequencies))],
[log(i) or i in word_frequencies])
plt.plot([log(i) for i in range(1, 1 + len(word_frequencies))],
[log(i) for i in word_freq_ideal])
plt.title("Top 100 Dutch words compared to Zipf's Law")
plt.xlabel("log(Rank of word)")
plt.ylabel("log(Frequency of the word)")
plt.legend(("Top 100 Dutch words", "1/freq: ideal case"),
'upper right')
plt.grid(True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment