Skip to content

Instantly share code, notes, and snippets.

@arm5077
Created March 10, 2014 21:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arm5077/9474352 to your computer and use it in GitHub Desktop.
Save arm5077/9474352 to your computer and use it in GitHub Desktop.
Potholes in Pittsburgh
library(ggplot2)
potholes <- read.csv(file="potholes.csv", head=TRUE, sep=",", stringsAsFactors=T)
potholes$COMPOSEDATE <- as.POSIXlt(strptime(potholes$COMPOSEDATE, "%m/%d/%Y %H:%M"))
potholes$RESOLVEDDATE <- as.POSIXlt(strptime(potholes$RESOLVEDDATE, "%m/%d/%Y %H:%M"))
potholes$COMPLETED_DATE <- as.POSIXlt(strptime(potholes$COMPLETED_DATE, "%m/%d/%Y %H:%M"))
potholes$DAYS_UNTIL_COMPLETED <- difftime(potholes$COMPLETED_DATE,potholes$COMPOSEDATE, units="days")
# Make subset of dataset that excludes potholes that weren't filled
completedPotholes <- potholes[is.na(potholes$DAYS_UNTIL_COMPLETED) != TRUE,]
# Convert days until completed into numeric value in subset
completedPotholes$DAYS_UNTIL_COMPLETED <- as.numeric(completedPotholes$DAYS_UNTIL_COMPLETED)
# Break out averages per district
districts = as.data.frame( sapply( split( completedPotholes$DAYS_UNTIL_COMPLETED, completedPotholes$COUNCILDISTRICT ),median ) )
colnames(districts) <- "days"
districts$district <- c(0,1,2,3,4,5,6,7,8,9)
districts$councilMember <- c("None","Darlene Harris","Theresa Kail-Smith","Bruce Kraus","Natalia Rudiak","Corey O'Connor","Daniel Lavelle","Deborah Gross/Pat Dowd","Dan Gilman/Bill Peduto","Ricky Burgess")
# Add totals of potholes filled
freq = as.data.frame(table(completedPotholes$COUNCILDISTRICT))
districts$total = freq$Freq
rm(freq)
# For kicks, let's make "uncompleted potholes" list
uncompletedPotholes <- potholes[is.na(potholes$DAYS_UNTIL_COMPLETED) == TRUE,]
# Find uncompleted total per district
uncompletedTotal = as.data.frame(table(uncompletedPotholes$COUNCILDISTRICT))
# Rename fields
colnames(uncompletedTotal) <- c("district","unfilled")
districts <- merge(districts,uncompletedTotal,by="district")
# Sort averages by average
districts[order(districts$days),]
# Bring in election totals
election <- read.csv(file="2009-election-results.csv", head=TRUE, sep=",", stringsAsFactors=T)
election$ward = as.factor(election$ward)
electedPotholes = data.frame(ward=potholes$WARD, councildistrict=potholes$COUNCILDISTRICT, days=potholes$DAYS_UNTIL_COMPLETED)
electedPotholes = merge(electedPotholes, election, by="ward")
# Make new dataframe sorting median pothole-filling time by DPW division
divisions = as.data.frame( sapply( split( completedPotholes$DAYS_UNTIL_COMPLETED, completedPotholes$REFERTODEPT ),median ) )
# See what street names had the most pothole complaints
streets = as.data.frame(table(potholes$PRIMARYSTREETNAME))
colnames(streets) = c("street", "freq")
streets = streets[order(-streets$freq),]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment