Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
## Code for subsetting and formatting open paths data for Mapbox
## Miles Grimshaw
## December 26th 2013
## Set working directory and load required packages
getwd()
setwd("~/Dropbox/Personal/Mapbox/OpenPaths/")
library(stringr)
library(lubridate)
# library(ggplot2)
## Read in the data
d <- read.csv("./openpaths_milesgrimshaw.csv", header=TRUE, as.is=TRUE)
head(d)
## Convert $date to a time variable
d$t <- as.POSIXct(d$date,format='%Y-%m-%d %H:%M:%S', tz="America/New_York")
## Just select lat, lon, alt, t
d2 <- d[,c(1,2,3,8)]
head(d2)
## Only select unique rows
d3 <- unique(d2)
## Trim down to 5 sifnificant figures
d3$lat <- signif(d3$lat,5)
d3$lon <- signif(d3$lon,5)
## Further subset removing the 'alt' column
d3 <- d3[,c(1,2,4)]
## Only want data for 2013
year_2013 <- as.POSIXct("2013-01-01",format='%Y-%m-%d', tz="America/New_York")
d4 <- d3[which(d3$t >= year_2013),]
## Further trim the number of points to those > than 10 mins apart
d5 <- d4[1,]
for (i in 2:nrow(d4)) {
if (difftime(d4$t[i],d4$t[i-1],units="mins") > 10) {
d5 <- rbind(d5,d4[i,])
}
}
## Create a second column to enable creation of line segments
## This is not neccessary as we do in the Ruby script to create line segments
# for (i in 1:(nrow(d5)-1)) {
# d5$lat2[i] <- d5$lat[i+1]
# d5$lon2[i] <- d5$lon[i+1]
# }
## Rename columns
colnames(d5) <- c("latitude", "longitude", "time")
## Create seperate columns for the day of the week, month, and day of the year from time stamp
head(d5)
d5$week_day[which(wday(d5$time)==1)] <- 'Monday'
d5$week_day[which(wday(d5$time)==2)] <- 'Tuesday'
d5$week_day[which(wday(d5$time)==3)] <- 'Wednesday'
d5$week_day[which(wday(d5$time)==4)] <- 'Thursday'
d5$week_day[which(wday(d5$time)==5)] <- 'Friday'
d5$week_day[which(wday(d5$time)==6)] <- 'Saturday'
d5$week_day[which(wday(d5$time)==7)] <- 'Sunday'
d5$month[which(month(d5$time)==1)] <- 'January'
d5$month[which(month(d5$time)==2)] <- 'February'
d5$month[which(month(d5$time)==3)] <- 'March'
d5$month[which(month(d5$time)==4)] <- 'April'
d5$month[which(month(d5$time)==5)] <- 'May'
d5$month[which(month(d5$time)==6)] <- 'June'
d5$month[which(month(d5$time)==7)] <- 'July'
d5$month[which(month(d5$time)==8)] <- 'August'
d5$month[which(month(d5$time)==9)] <- 'September'
d5$month[which(month(d5$time)==10)] <- 'October'
d5$month[which(month(d5$time)==11)] <- 'November'
d5$month[which(month(d5$time)==12)] <- 'December'
## Set the day
d5$day <- mday(d5$time)
## Check format
head(d5)
tail(d5)
## We don't need the time column
d6 <- d5[,c(1,2,4,5,6)]
head(d6)
## Write to CSV
write.csv(d6, file = "paths_final.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment