Skip to content

Instantly share code, notes, and snippets.

@jebyrnes
Created November 5, 2012 17:11
Show Gist options
  • Save jebyrnes/4018382 to your computer and use it in GitHub Desktop.
Save jebyrnes/4018382 to your computer and use it in GitHub Desktop.
Visualize the Hubway Data
##################################################################################################################
######
###### Code to visualize the Hubway Data Set
###### for the Hubway Data Challenge
###### http://hubwaydatachallenge.org/
######
###### Jarrett Byrnes, http://jarrettbyrnes.info
######
###### Last Updated Nov 5, 2012
##################################################################################################################
##
# Load some libraries
##
library(plyr)
library(ggplot2)
library(reshape2)
##
#Load the Data
##
trips <- read.csv("./trips_time.csv")
stations <- read.csv("./stations.csv")
latLong_lookup <- function(id) {
idx <- match(id, stations$id)
return(data.frame(lat = stations$lat[idx], lng = stations$lng[idx]))
}
##
# extract times
##
trips$start_date<- as.POSIXlt(trips$start_date)
trips$end_date<- as.POSIXlt(trips$end_date)
trips$start_Time <- format(trips$start_date, format="%H:00")
trips$end_Time <- format(trips$end_date, format="%H:00")
#############
#get the information about all of the trips ever
#############
trips_aggregated_all <- ddply(trips, .(start_station_id, end_station_id),
summarize, n.Trips = length(duration), mean.duration = mean(duration, na.rm=T))
trips_aggregated_all<- cbind( trips_aggregated_all,
latLong_lookup(trips_aggregated_all$start_station_id),
latLong_lookup(trips_aggregated_all$end_station_id))
names(trips_aggregated_all)[7:8]<-paste(names(trips_aggregated_all)[7:8], ".end", sep="")
#plot it!
ggplot(trips_aggregated_all, aes(x=lng, y=lat,
xend=lng.end, yend=lat.end,
color=log10(n.Trips), size=mean.duration)) +
geom_segment(alpha=0.03) +
theme_bw() +
xlab("Longitude") + ylab("Latitude") +
scale_color_gradient("Log of Number of Trips",
low="blue", high="red") +
scale_size_continuous("Trip Duration") +
guides(size = guide_legend(override.aes = list(alpha = 1))) +
geom_point(data=stations,
aes(x=lng, y=lat, xend=lng, yend=lat), size=1,color="black")
ggsave("hubway_alltraffic.pdf")
########################################
#####Now aggregate by start time
########################################
trips_aggregated_start <- ddply(trips, .(start_station_id, end_station_id, start_Time),
summarize,
n.Trips = length(duration), mean.duration = mean(duration, na.rm=T))
trips_aggregated_start<- cbind( trips_aggregated_start,
latLong_lookup(trips_aggregated_start$start_station_id),
latLong_lookup(trips_aggregated_start$end_station_id))
names(trips_aggregated_start)[8:9]<-paste(names(trips_aggregated_start)[8:9], ".end", sep="")
ggplot(trips_aggregated_start, aes(x=lng, y=lat,
xend=lng.end, yend=lat.end,
color=log(n.Trips), size=mean.duration)) +
geom_segment(mapping=aes(alpha=n.Trips)) +
facet_wrap(~ start_Time)+
theme_bw() +
xlab("Longitude") + ylab("Latitude") +
scale_color_gradient("Log of Number of Trips",
low="blue", high="red") +
scale_size_continuous("Trip Duration") +
guides(size = guide_legend(override.aes = list(alpha = 1))) +
geom_point(data=stations,
aes(x=lng, y=lat, xend=lng, yend=lat), size=1,color="black") +
ggtitle("Trips Plotted by Start Time \n") +
scale_alpha_continuous("Number of Trips", range=c(0.01, 0.05))+
scale_x_continuous(breaks=c(-71.15, -71.1, -71.05)) +
scale_y_continuous(breaks=c(42.31, 42.35, 42.37))
ggsave("trips_by_start_time.pdf")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment