Skip to content

Instantly share code, notes, and snippets.

@gipi
Created September 17, 2013 12:33
Show Gist options
  • Save gipi/6593731 to your computer and use it in GitHub Desktop.
Save gipi/6593731 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# inspired from <http://stackoverflow.com/questions/5821125/how-to-plot-confusion-matrix-with-string-axis-rather-than-integer-in-python>
import matplotlib.pyplot as plt
import numpy as np
import string
import sys
def generate_matrix_from_bigrams(bi):
m = np.zeros((26, 26), dtype=int)
for cx, x in zip(string.lowercase, range(26)):
first_letter_frequency = bi.get(cx, {})
for cy, y in zip(string.lowercase, range(26)):
m[x,y] = first_letter_frequency.get(cy, 0)
return m
def calculate_bigrams(text):
frequencies = {}
master = None
for c in text:
# first round
if not master:
master = c
frequencies.setdefault(master, {})
continue
# real frequency
frequencies[master][c] = frequencies[master].setdefault(c, 0) + 1
# first letter of the bigram
master = c
if master not in frequencies:
frequencies[master] = {}
return frequencies
if __name__ == "__main__":
text = sys.stdin.read()
bigrams = calculate_bigrams(text)
plt.imshow(generate_matrix_from_bigrams(bigrams), interpolation='nearest')
plt.set_cmap("binary")
plt.colorbar()
plt.xticks(np.arange(0,25), string.lowercase)
plt.yticks(np.arange(0,25), string.lowercase)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment