abelsonlive/scrapeBikeShare.R

## scrapeBikeShare.R
require("rjson")

#generate urls to scrape
#I determined the lowest and highest unique ids
#by looking at the page that had just the coordinates

root.url <- "http://a841-tfpweb.nyc.gov/bikeshare/get_point_info?point="
id<- seq(11992,12404, by=1)
urls <- paste(root.url, id, sep="")

#create shell data frame

n <- length(urls)
scraped <-data.frame(matrix(0, nrow=n, ncol=5))
names(scraped) <- c("id","lat", "lng", "docks", "reason")

for(i in 1:n){

	#read in webpage

	data <- fromJSON(
		     try(
               readLines(urls[i], warn=F,ok=T)))

	#scrape id, lat, and long
	#use as.numeric to avoid problem w/
	#assigning a list to a data frame

	scraped[i,1]<- as.numeric(data[1])
	scraped[i,2]<- as.numeric(data[2])
	scraped[i,3]<- as.numeric(data[3])

	#scrape and extract number of docks

	dock.temp <- as.character(data[[7]])
	dock.temp <- gsub("[A-Za-z]+", "", dock.temp)
	dock.temp <- gsub(" ", "", dock.temp)
	dock.temp <- gsub("\\.", "", dock.temp)
	scraped[i,4] <- as.numeric(dock.temp)

	#scrape "reason"

	scraped[i,5]<- as.character(data[9])

	#end scraper
}

#write csv

write.csv(scraped, "scraped.csv", row.names=F)
	require("rjson")

	#generate urls to scrape
	#I determined the lowest and highest unique ids
	#by looking at the page that had just the coordinates

	root.url <- "http://a841-tfpweb.nyc.gov/bikeshare/get_point_info?point="
	id<- seq(11992,12404, by=1)
	urls <- paste(root.url, id, sep="")

	#create shell data frame

	n <- length(urls)
	scraped <-data.frame(matrix(0, nrow=n, ncol=5))
	names(scraped) <- c("id","lat", "lng", "docks", "reason")

	for(i in 1:n){

	#read in webpage

	data <- fromJSON(
	try(
	readLines(urls[i], warn=F,ok=T)))

	#scrape id, lat, and long
	#use as.numeric to avoid problem w/
	#assigning a list to a data frame

	scraped[i,1]<- as.numeric(data[1])
	scraped[i,2]<- as.numeric(data[2])
	scraped[i,3]<- as.numeric(data[3])

	#scrape and extract number of docks

	dock.temp <- as.character(data[[7]])
	dock.temp <- gsub("[A-Za-z]+", "", dock.temp)
	dock.temp <- gsub(" ", "", dock.temp)
	dock.temp <- gsub("\\.", "", dock.temp)
	scraped[i,4] <- as.numeric(dock.temp)

	#scrape "reason"

	scraped[i,5]<- as.character(data[9])

	#end scraper
	}

	#write csv

	write.csv(scraped, "scraped.csv", row.names=F)