## Code for subsetting and formatting open paths data for Mapbox | |
## Miles Grimshaw | |
## December 26th 2013 | |
## Set working directory and load required packages | |
getwd() | |
setwd("~/Dropbox/Personal/Mapbox/OpenPaths/") | |
library(stringr) | |
library(lubridate) | |
# library(ggplot2) | |
## Read in the data | |
d <- read.csv("./openpaths_milesgrimshaw.csv", header=TRUE, as.is=TRUE) | |
head(d) | |
## Convert $date to a time variable | |
d$t <- as.POSIXct(d$date,format='%Y-%m-%d %H:%M:%S', tz="America/New_York") | |
## Just select lat, lon, alt, t | |
d2 <- d[,c(1,2,3,8)] | |
head(d2) | |
## Only select unique rows | |
d3 <- unique(d2) | |
## Trim down to 5 sifnificant figures | |
d3$lat <- signif(d3$lat,5) | |
d3$lon <- signif(d3$lon,5) | |
## Further subset removing the 'alt' column | |
d3 <- d3[,c(1,2,4)] | |
## Only want data for 2013 | |
year_2013 <- as.POSIXct("2013-01-01",format='%Y-%m-%d', tz="America/New_York") | |
d4 <- d3[which(d3$t >= year_2013),] | |
## Further trim the number of points to those > than 10 mins apart | |
d5 <- d4[1,] | |
for (i in 2:nrow(d4)) { | |
if (difftime(d4$t[i],d4$t[i-1],units="mins") > 10) { | |
d5 <- rbind(d5,d4[i,]) | |
} | |
} | |
## Create a second column to enable creation of line segments | |
## This is not neccessary as we do in the Ruby script to create line segments | |
# for (i in 1:(nrow(d5)-1)) { | |
# d5$lat2[i] <- d5$lat[i+1] | |
# d5$lon2[i] <- d5$lon[i+1] | |
# } | |
## Rename columns | |
colnames(d5) <- c("latitude", "longitude", "time") | |
## Create seperate columns for the day of the week, month, and day of the year from time stamp | |
head(d5) | |
d5$week_day[which(wday(d5$time)==1)] <- 'Monday' | |
d5$week_day[which(wday(d5$time)==2)] <- 'Tuesday' | |
d5$week_day[which(wday(d5$time)==3)] <- 'Wednesday' | |
d5$week_day[which(wday(d5$time)==4)] <- 'Thursday' | |
d5$week_day[which(wday(d5$time)==5)] <- 'Friday' | |
d5$week_day[which(wday(d5$time)==6)] <- 'Saturday' | |
d5$week_day[which(wday(d5$time)==7)] <- 'Sunday' | |
d5$month[which(month(d5$time)==1)] <- 'January' | |
d5$month[which(month(d5$time)==2)] <- 'February' | |
d5$month[which(month(d5$time)==3)] <- 'March' | |
d5$month[which(month(d5$time)==4)] <- 'April' | |
d5$month[which(month(d5$time)==5)] <- 'May' | |
d5$month[which(month(d5$time)==6)] <- 'June' | |
d5$month[which(month(d5$time)==7)] <- 'July' | |
d5$month[which(month(d5$time)==8)] <- 'August' | |
d5$month[which(month(d5$time)==9)] <- 'September' | |
d5$month[which(month(d5$time)==10)] <- 'October' | |
d5$month[which(month(d5$time)==11)] <- 'November' | |
d5$month[which(month(d5$time)==12)] <- 'December' | |
## Set the day | |
d5$day <- mday(d5$time) | |
## Check format | |
head(d5) | |
tail(d5) | |
## We don't need the time column | |
d6 <- d5[,c(1,2,4,5,6)] | |
head(d6) | |
## Write to CSV | |
write.csv(d6, file = "paths_final.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment