public
Last active

Scrape NYC BikeShare Data

  • Download Gist
scrapeBikeShare.R
R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
require("rjson")
 
#generate urls to scrape
#I determined the lowest and highest unique ids
#by looking at the page that had just the coordinates
 
root.url <- "http://a841-tfpweb.nyc.gov/bikeshare/get_point_info?point="
id<- seq(11992,12404, by=1)
urls <- paste(root.url, id, sep="")
 
#create shell data frame
 
n <- length(urls)
scraped <-data.frame(matrix(0, nrow=n, ncol=5))
names(scraped) <- c("id","lat", "lng", "docks", "reason")
 
for(i in 1:n){
 
#read in webpage
 
data <- fromJSON(
try(
readLines(urls[i], warn=F,ok=T)))
#scrape id, lat, and long
#use as.numeric to avoid problem w/
#assigning a list to a data frame
 
scraped[i,1]<- as.numeric(data[1])
scraped[i,2]<- as.numeric(data[2])
scraped[i,3]<- as.numeric(data[3])
#scrape and extract number of docks
 
dock.temp <- as.character(data[[7]])
dock.temp <- gsub("[A-Za-z]+", "", dock.temp)
dock.temp <- gsub(" ", "", dock.temp)
dock.temp <- gsub("\\.", "", dock.temp)
scraped[i,4] <- as.numeric(dock.temp)
#scrape "reason"
 
scraped[i,5]<- as.character(data[9])
#end scraper
}
#write csv
write.csv(scraped, "scraped.csv", row.names=F)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.