arm5077/potholes.r

## potholes.r
library(ggplot2)

potholes <- read.csv(file="potholes.csv", head=TRUE, sep=",", stringsAsFactors=T)
potholes$COMPOSEDATE <- as.POSIXlt(strptime(potholes$COMPOSEDATE, "%m/%d/%Y %H:%M"))
potholes$RESOLVEDDATE <- as.POSIXlt(strptime(potholes$RESOLVEDDATE, "%m/%d/%Y %H:%M"))
potholes$COMPLETED_DATE <- as.POSIXlt(strptime(potholes$COMPLETED_DATE, "%m/%d/%Y %H:%M"))
potholes$DAYS_UNTIL_COMPLETED <- difftime(potholes$COMPLETED_DATE,potholes$COMPOSEDATE, units="days")

# Make subset of dataset that excludes potholes that weren't filled
completedPotholes <- potholes[is.na(potholes$DAYS_UNTIL_COMPLETED) != TRUE,]

# Convert days until completed into numeric value in subset
completedPotholes$DAYS_UNTIL_COMPLETED <- as.numeric(completedPotholes$DAYS_UNTIL_COMPLETED)

# Break out averages per district
districts = as.data.frame( sapply( split( completedPotholes$DAYS_UNTIL_COMPLETED, completedPotholes$COUNCILDISTRICT ),median ) )
colnames(districts) <- "days"
districts$district <- c(0,1,2,3,4,5,6,7,8,9)
districts$councilMember <- c("None","Darlene Harris","Theresa Kail-Smith","Bruce Kraus","Natalia Rudiak","Corey O'Connor","Daniel Lavelle","Deborah Gross/Pat Dowd","Dan Gilman/Bill Peduto","Ricky Burgess")

# Add totals of potholes filled
freq = as.data.frame(table(completedPotholes$COUNCILDISTRICT))
districts$total = freq$Freq
rm(freq)

# For kicks, let's make "uncompleted potholes" list
uncompletedPotholes <- potholes[is.na(potholes$DAYS_UNTIL_COMPLETED) == TRUE,]

# Find uncompleted total per district
uncompletedTotal = as.data.frame(table(uncompletedPotholes$COUNCILDISTRICT))

# Rename fields
colnames(uncompletedTotal) <- c("district","unfilled")

districts <- merge(districts,uncompletedTotal,by="district")

# Sort averages by average
districts[order(districts$days),]

# Bring in election totals
election <- read.csv(file="2009-election-results.csv", head=TRUE, sep=",", stringsAsFactors=T)
election$ward = as.factor(election$ward)
electedPotholes = data.frame(ward=potholes$WARD, councildistrict=potholes$COUNCILDISTRICT, days=potholes$DAYS_UNTIL_COMPLETED)
electedPotholes = merge(electedPotholes, election, by="ward")

# Make new dataframe sorting median pothole-filling time by DPW division
divisions = as.data.frame( sapply( split( completedPotholes$DAYS_UNTIL_COMPLETED, completedPotholes$REFERTODEPT ),median ) )

# See what street names had the most pothole complaints
streets = as.data.frame(table(potholes$PRIMARYSTREETNAME))
colnames(streets) = c("street", "freq")
streets = streets[order(-streets$freq),]
	library(ggplot2)

	potholes <- read.csv(file="potholes.csv", head=TRUE, sep=",", stringsAsFactors=T)
	potholes$COMPOSEDATE <- as.POSIXlt(strptime(potholes$COMPOSEDATE, "%m/%d/%Y %H:%M"))
	potholes$RESOLVEDDATE <- as.POSIXlt(strptime(potholes$RESOLVEDDATE, "%m/%d/%Y %H:%M"))
	potholes$COMPLETED_DATE <- as.POSIXlt(strptime(potholes$COMPLETED_DATE, "%m/%d/%Y %H:%M"))
	potholes$DAYS_UNTIL_COMPLETED <- difftime(potholes$COMPLETED_DATE,potholes$COMPOSEDATE, units="days")

	# Make subset of dataset that excludes potholes that weren't filled
	completedPotholes <- potholes[is.na(potholes$DAYS_UNTIL_COMPLETED) != TRUE,]

	# Convert days until completed into numeric value in subset
	completedPotholes$DAYS_UNTIL_COMPLETED <- as.numeric(completedPotholes$DAYS_UNTIL_COMPLETED)

	# Break out averages per district
	districts = as.data.frame( sapply( split( completedPotholes$DAYS_UNTIL_COMPLETED, completedPotholes$COUNCILDISTRICT ),median ) )
	colnames(districts) <- "days"
	districts$district <- c(0,1,2,3,4,5,6,7,8,9)
	districts$councilMember <- c("None","Darlene Harris","Theresa Kail-Smith","Bruce Kraus","Natalia Rudiak","Corey O'Connor","Daniel Lavelle","Deborah Gross/Pat Dowd","Dan Gilman/Bill Peduto","Ricky Burgess")

	# Add totals of potholes filled
	freq = as.data.frame(table(completedPotholes$COUNCILDISTRICT))
	districts$total = freq$Freq
	rm(freq)

	# For kicks, let's make "uncompleted potholes" list
	uncompletedPotholes <- potholes[is.na(potholes$DAYS_UNTIL_COMPLETED) == TRUE,]

	# Find uncompleted total per district
	uncompletedTotal = as.data.frame(table(uncompletedPotholes$COUNCILDISTRICT))

	# Rename fields
	colnames(uncompletedTotal) <- c("district","unfilled")

	districts <- merge(districts,uncompletedTotal,by="district")

	# Sort averages by average
	districts[order(districts$days),]

	# Bring in election totals
	election <- read.csv(file="2009-election-results.csv", head=TRUE, sep=",", stringsAsFactors=T)
	election$ward = as.factor(election$ward)
	electedPotholes = data.frame(ward=potholes$WARD, councildistrict=potholes$COUNCILDISTRICT, days=potholes$DAYS_UNTIL_COMPLETED)
	electedPotholes = merge(electedPotholes, election, by="ward")

	# Make new dataframe sorting median pothole-filling time by DPW division
	divisions = as.data.frame( sapply( split( completedPotholes$DAYS_UNTIL_COMPLETED, completedPotholes$REFERTODEPT ),median ) )

	# See what street names had the most pothole complaints
	streets = as.data.frame(table(potholes$PRIMARYSTREETNAME))
	colnames(streets) = c("street", "freq")
	streets = streets[order(-streets$freq),]