Created
February 12, 2014 02:36
-
-
Save szeitlin/8949003 to your computer and use it in GitHub Desktop.
example from Lynn Root's workshop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter #collections is a standard library, Counter is capitalized | |
import csv | |
import matplotlib.pyplot as plt #renames the pyplot function from third party library | |
import numpy.numarray as na # best practices: import in alphabetical order | |
MY_FILE = "../data/sample_sfpd_incident_all.csv" | |
def parse(raw_file, delimiter): | |
""" csv file -> JSON-like object""" | |
# open csv file | |
opened_file = open(raw_file) #open is a built-in function in python | |
# read the csv data | |
csv_data = csv.reader(opened_file, delimiter=delimiter) #using the csv module we imported | |
# parse data into python data type | |
parsed_data = [] #create an empty list (actually will be a dictionary, see below) | |
fields = csv_data.next() # reads the first line to get the header row | |
#iterate over the rest of the rows | |
for row in csv_data: | |
parsed_data.append(dict(zip(fields, row))) #makes pairs of field:row --> into a dictionary | |
# close the csv file | |
opened_file.close() #just to be safe | |
# return the parsed data | |
return parsed_data | |
def visualize_days(): | |
""" visualize data by day of week | |
for each row of the data file, sum the values for each key -> return a new dict | |
i.e. how many Mondays are there, put those together --> line graph""" | |
data_file = parse(MY_FILE, ",") | |
counter = Counter(item["DayOfWeek"] for item in data_file) #called a set comprehension because it's inside of a co$ | |
#want to get the days of the week in order | |
data_list = [counter["Monday"], counter["Tuesday"], counter["Wednesday"], | |
counter["Thursday"], counter["Friday"], | |
counter["Saturday"], counter["Sunday"]] | |
#make x-axis labels, preserve order, immutable format | |
day_tuple = tuple(["Mon", "Tues", "Wed", "Thurs", "Fri", "Sat", "Sun"]) | |
plt.plot(data_list) #use the plot function from matplotlib | |
plt.xticks(range(len(day_tuple)), day_tuple) #number of ticks, did this in the most flexible way, labels | |
plt.savefig("Days.png") #filename | |
plt.clf() #close figure | |
#should go back and try to add y-axis labels | |
def visualize_type(): | |
"""what kind of crime -> bar graph""" | |
data_file = parse(MY_FILE, ",") | |
counter = Counter(item["Category"] for item in data_file) | |
labels = tuple(counter.keys()) #this way you don't have to know what they are ahead of time | |
xlocations = na.array(range(len(labels))) + 0.5 #na is numpy, 0.5 is the offset she determine empirically | |
width = 0.5 #units are not known, see docs if you really want to know | |
plt.bar(xlocations, counter.values(), width=width) #left, height, width | |
plt.xticks(xlocations + width/2, labels, rotation=90) #centered labels and make vertical | |
plt.subplots_adjust(bottom=0.4) #just so it's not cut off | |
plt.rcParams["figure.figsize"] = 12, 8 #plot scale, units unknown | |
plt.savefig("Type.png") | |
plt.clf() | |
#should go back and try to add y-axis labels | |
def main(): | |
#visualize_days() | |
visualize_type() | |
if __name__== "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment