Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save javieroot/407f1b3d1d80c574b597a84a99b71bda to your computer and use it in GitHub Desktop.
Save javieroot/407f1b3d1d80c574b597a84a99b71bda to your computer and use it in GitHub Desktop.
Web Scrapping - Get 2 nodes at the same time
library(RSelenium)
library(rvest)
#start RSelenium
checkForServer()
startServer()
remDr <- remoteDriver()
remDr$open()
#navigate to your page
remDr$navigate("http://www.linio.com.pe/celulares-telefonia-y-gps/")
#scroll down 4 times, waiting for the page to load at each time.
#you need to scroll 4 times to see a product with just the actual price, #and not old price.
for(i in 1:4){
remDr$executeScript(paste("scroll(0,",i*10000,");"))
Sys.sleep(3)
}
#get the page html
page_source<-remDr$getPageSource()
#parse it
####### Producto ##################################
Celulares_Telefonia_Producto <- html(page_source[[1]]) %>%
html_nodes("em") %>%
html_text()
Celulares_Telefonia_Producto <- gsub("^\\s+|\\s+$",
"",
Celulares_Telefonia_Producto)
###### Precio Actual ###############################
Celulares_Telefonia_Precio_actual <- html(page_source[[1]]) %>%
html_nodes(".product-itm-price-new") %>%
html_text()
Celulares_Telefonia_Precio_actual <- gsub("^\\s+|\\s+$",
"",
Celulares_Telefonia_Precio_actual)
###### Precio Antiguo ###############################
Celulares_Telefonia_Precio_antiguo <- html(page_source[[1]]) %>%
html_nodes(".product-itm-price-old") %>%
html_text()
Celulares_Telefonia_Precio_antiguo <- gsub("^\\s+|\\s+$",
"",
Celulares_Telefonia_Precio_antiguo)
#######################################################
length(Celulares_Telefonia_Precio_actual)
length(Celulares_Telefonia_Precio_antiguo)
#The lengths are different, because not all products have an old price.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment