Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
R script to analyze personal Citibike data
library(ggplot2)
library(lubridate)
## Set the working directory
setwd("~/Dropbox (Personal)/Personal/Github/Citibike")
## Organize restaurant data
r <- read.csv('trips.csv', header=TRUE, as.is=TRUE)
head(r)
## Calculate minute amount for each trip
r$actual_mins <- r$actual_duration/60
r$estimated_mins <- r$estimated_duration/60
## Save a version with only trips less than 60 mins
## My trips over 60 mins are errors
r <- r[which(r$actual_mins<60),]
write.csv(r,file="miles_trips_edits.csv", row.names=FALSE)
## Calculate average difference in actual vs. trip
r$time_diff_perc <- (r$estimated_mins - r$actual_mins)/r$estimated_mins
## Calculate a mile amount from meters for each trip
r$miles <- r$estimated_distance/1600
## Calculate a MPH amount for each trip
r$mph <- (r$miles/r$actual_mins)*60
## Correcting for errors
r <- r[which(r$actual_mins < 100),]
r <- r[which(r$mph < 25),]
r <- r[which(r$mph > 0),]
## Summary Stats
sum(r$miles)
summary(r$miles)
summary(r$miles[which(r$miles>0)])
sum(r$actual_mins)
sum(r$estimated_mins)
summary(r$actual_mins)
summary(r$time_diff_perc[which(is.finite(r$time_diff_perc))])*100
summary(r$mph)
summary(r$mph[which(r$mph<20)])
## Create time stamps
r$start_date <- as.POSIXct(r$start_time,origin="1970-01-01", format="%m/%d/%Y %I:%M:%S %p")
r$end_date <- as.POSIXct(r$end_time,origin="1970-01-01", format="%m/%d/%Y %I:%M:%S %p")
# End time in mins
r$end_date_mins <- ((hour(r$end_date)*60)+minute(r$end_date))
r$start_date_mins <- ((hour(r$start_date)*60)+minute(r$start_date))
## Create weekday morning and evening commute groups
weekday <- r[which(wday(r$start_date) > 1 & wday(r$start_date) < 7),]
morn_commute <- weekday[which(hour(weekday$start_date) > 6 & hour(weekday$start_date) < 11),]
morn_commute <- morn_commute[which(morn_commute$end_station == "Lafayette St & Jersey St"),]
weekday_evening <- weekday[which(hour(weekday$start_date) > 17 & hour(weekday$start_date) < 24),]
eve_commute <- weekday_evening[which(weekday_evening$start_station == "Lafayette St & Jersey St"),]
eve_commute_home <- weekday_evening[which(weekday_evening$end_station == "Greenwich Ave & Charles St"),]
## Summary speeds for morning vs evening commutes
summary(morn_commute$mph)
summary(weekday_evening$mph)
summary(eve_commute$mph)
## Morning work commmute end time in mins
summary(morn_commute$end_date_mins)/60
length(which((morn_commute$end_date_mins/60)>9))/length(morn_commute$end_date_mins)
length(which((morn_commute$end_date_mins/60)<=8))/length(morn_commute$end_date_mins)
## Evening work commmute end time in mins
summary(eve_commute$start_date_mins)/60
length(which((eve_commute$start_date_mins/60)>20))/length(eve_commute$start_date_mins)
summary(eve_commute_home$end_date_mins)/60
length(which((eve_commute_home$end_date_mins/60)>22))/length(eve_commute_home$end_date_mins)
## Create weekday binary for graphic
r$weekday <- "Weekend"
r$weekday[which(wday(r$start_date) > 1 & wday(r$start_date) < 7)] <- "Workday"
## Weekday vs. weekend speed
summary(r$mph[which(r$weekday==1)])
summary(r$mph[which(r$weekday==0)])
## Weekday vs. weekend distance
summary(r$miles[which(r$weekday==1)])
summary(r$miles[which(r$weekday==0)])
## Plot of speed with weekday vs weekend colored
r$weekday <- factor(r$weekday)
pdf(file="Plot_Time_Speed_Weekday.pdf",width=11,height=8.5)
ggplot(r, aes(x=hour(r$start_date), y=r$mph)) + geom_point(aes(colour = weekday), position = "jitter") + xlab("Hour of the Day") +
ylab("MPH") + ggtitle("Trip Speed by Time of Day")
dev.off()
## Plot distribution of trip distances
ggplot(r, aes(x=miles)) + geom_histogram(binwidth=0.1, fill="#0000CC") + xlab("Miles") + ylab("Number of Trips") +
ggtitle("Distribution of Trip Lengths")
## Plot distribution of trip times
ggplot(r, aes(x=actual_mins)) + geom_histogram(binwidth=1)
## Plot distribution of trip distances
pdf(file="Distribution_Trip_Speeds.pdf",width=11,height=8.5)
ggplot(r, aes(x=mph)) + geom_histogram(binwidth=1, fill="#0000CC") + xlab("MPH") + ylab("Number of Trips") +
ggtitle("Distribution of Trip Speeds")
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.