Skip to content

Instantly share code, notes, and snippets.

@szeitlin
Created February 12, 2014 02:36
Show Gist options
  • Save szeitlin/8949003 to your computer and use it in GitHub Desktop.
Save szeitlin/8949003 to your computer and use it in GitHub Desktop.
example from Lynn Root's workshop.
from collections import Counter #collections is a standard library, Counter is capitalized
import csv
import matplotlib.pyplot as plt #renames the pyplot function from third party library
import numpy.numarray as na # best practices: import in alphabetical order
MY_FILE = "../data/sample_sfpd_incident_all.csv"
def parse(raw_file, delimiter):
""" csv file -> JSON-like object"""
# open csv file
opened_file = open(raw_file) #open is a built-in function in python
# read the csv data
csv_data = csv.reader(opened_file, delimiter=delimiter) #using the csv module we imported
# parse data into python data type
parsed_data = [] #create an empty list (actually will be a dictionary, see below)
fields = csv_data.next() # reads the first line to get the header row
#iterate over the rest of the rows
for row in csv_data:
parsed_data.append(dict(zip(fields, row))) #makes pairs of field:row --> into a dictionary
# close the csv file
opened_file.close() #just to be safe
# return the parsed data
return parsed_data
def visualize_days():
""" visualize data by day of week
for each row of the data file, sum the values for each key -> return a new dict
i.e. how many Mondays are there, put those together --> line graph"""
data_file = parse(MY_FILE, ",")
counter = Counter(item["DayOfWeek"] for item in data_file) #called a set comprehension because it's inside of a co$
#want to get the days of the week in order
data_list = [counter["Monday"], counter["Tuesday"], counter["Wednesday"],
counter["Thursday"], counter["Friday"],
counter["Saturday"], counter["Sunday"]]
#make x-axis labels, preserve order, immutable format
day_tuple = tuple(["Mon", "Tues", "Wed", "Thurs", "Fri", "Sat", "Sun"])
plt.plot(data_list) #use the plot function from matplotlib
plt.xticks(range(len(day_tuple)), day_tuple) #number of ticks, did this in the most flexible way, labels
plt.savefig("Days.png") #filename
plt.clf() #close figure
#should go back and try to add y-axis labels
def visualize_type():
"""what kind of crime -> bar graph"""
data_file = parse(MY_FILE, ",")
counter = Counter(item["Category"] for item in data_file)
labels = tuple(counter.keys()) #this way you don't have to know what they are ahead of time
xlocations = na.array(range(len(labels))) + 0.5 #na is numpy, 0.5 is the offset she determine empirically
width = 0.5 #units are not known, see docs if you really want to know
plt.bar(xlocations, counter.values(), width=width) #left, height, width
plt.xticks(xlocations + width/2, labels, rotation=90) #centered labels and make vertical
plt.subplots_adjust(bottom=0.4) #just so it's not cut off
plt.rcParams["figure.figsize"] = 12, 8 #plot scale, units unknown
plt.savefig("Type.png")
plt.clf()
#should go back and try to add y-axis labels
def main():
#visualize_days()
visualize_type()
if __name__== "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment