Created
August 14, 2016 17:32
-
-
Save mgarod/72f2e95d162780fd04c58a4f98ed0d2a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Title: charfreq.py | |
# Author: Michael Gard | |
# Date Created: 8/14/16 | |
# Description: Given the plain text resume "pt_MG_Resume.txt", output a barplot in Seaborn representing individual character frequency. Colors of the bars are determined by creating one "color bucket" per each of the 26 characters. Higher frequencies get darker colors | |
import seaborn as sns | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import string | |
def make_heat_palette(vals, pal): | |
maxindex = len(vals) - 1 | |
minv = min(vals) | |
maxv = max(vals) | |
denom = float(maxv - minv) | |
return [pal[int(((x - minv) / denom) * maxindex)] for x in vals] | |
results = {c: 0 for c in string.uppercase} | |
with open("pt_MG_Resume.txt") as f: | |
for line in f: | |
for i in line.upper(): | |
if i in results: | |
results[i] += 1 | |
chars = results.keys() | |
freqs = results.values() | |
red_pal = sns.color_palette("Reds", len(freqs)) | |
custom_pal = make_heat_palette(freqs, red_pal) | |
sns.barplot(chars, freqs, palette=custom_pal) | |
plt.title('Character Frequency of MG Resume') | |
plt.xlabel('Character') | |
plt.ylabel('Frequency') | |
sns.plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment