Instantly share code, notes, and snippets.

Embed
What would you like to do?
Scrape NYC BikeShare Data
require("rjson")
#generate urls to scrape
#I determined the lowest and highest unique ids
#by looking at the page that had just the coordinates
root.url <- "http://a841-tfpweb.nyc.gov/bikeshare/get_point_info?point="
id<- seq(11992,12404, by=1)
urls <- paste(root.url, id, sep="")
#create shell data frame
n <- length(urls)
scraped <-data.frame(matrix(0, nrow=n, ncol=5))
names(scraped) <- c("id","lat", "lng", "docks", "reason")
for(i in 1:n){
#read in webpage
data <- fromJSON(
try(
readLines(urls[i], warn=F,ok=T)))
#scrape id, lat, and long
#use as.numeric to avoid problem w/
#assigning a list to a data frame
scraped[i,1]<- as.numeric(data[1])
scraped[i,2]<- as.numeric(data[2])
scraped[i,3]<- as.numeric(data[3])
#scrape and extract number of docks
dock.temp <- as.character(data[[7]])
dock.temp <- gsub("[A-Za-z]+", "", dock.temp)
dock.temp <- gsub(" ", "", dock.temp)
dock.temp <- gsub("\\.", "", dock.temp)
scraped[i,4] <- as.numeric(dock.temp)
#scrape "reason"
scraped[i,5]<- as.character(data[9])
#end scraper
}
#write csv
write.csv(scraped, "scraped.csv", row.names=F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment