Skip to content

Instantly share code, notes, and snippets.

@amitkaps
Last active August 29, 2015 14:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amitkaps/8cc46e481d8befa0bdda to your computer and use it in GitHub Desktop.
Save amitkaps/8cc46e481d8befa0bdda to your computer and use it in GitHub Desktop.
## Scraper One
library(rvest)
url = "http://nuforc.org/webreports/ndxe201507.html"
pg <- html(url)
nodes <- html_nodes(pg, "table")
table <- html_table(nodes)
str(table)
View(table)
# Alternate pipe function
# table <- pg %>% html_nodes("table") %>% html_table()
## Scraper Two
url2 <- "http://carzoom.in/car-specification/"
pg2 <- html(url2)
nodes2 <- html_nodes(pg2, ".car-model li a")
title2 <- html_text(nodes2)
links2 <- html_attr(nodes2, "href")
links2
## Scraper Three
url3 <- "http://nhrdf.org/en-us/AreaAndProductiionReport"
pg3.session <- html_session(url3)
pg3.form <- html_form(pg3.session)[[1]]
pg3.form.filled <- set_values(pg3.form,
"dnn$dnnLANG$selectCulture" = "en-US",
"dnn$ctr961$AreaAndProductiionReport$Year" = 2014,
"dnn$ctr961$AreaAndProductiionReport$State" = "All",
"dnn$ctr961$AreaAndProductiionReport$ddlCrop" = 1)
pg3.submit <- submit_form(pg3.session, pg3.form.filled,
submit = 'dnn$ctr961$AreaAndProductiionReport$BtnGetMnthWiseData')
pg3.out <- html(pg3.submit)
pg3.table <- pg3.out %>%
html_node("#dnn_ctr961_AreaAndProductiionReport_GridView1") %>%
html_table()
str(pg3.table)
# For Generalised Scraping (with parameters)
url4 = "http://nhrdf.org/en-us/AreaAndProductiionReport?year="
years <- 2014
urlGen <- paste0(url4, years)
urlGen
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment