Skip to content

Instantly share code, notes, and snippets.

@ceaksan
Created April 9, 2021 10:40
Show Gist options
  • Save ceaksan/d0b88f0266d79429af00b18360af8938 to your computer and use it in GitHub Desktop.
Save ceaksan/d0b88f0266d79429af00b18360af8938 to your computer and use it in GitHub Desktop.
library(RSelenium)
library(readxl)
PageURL <- read_xlsx(path = '<file-name>.xlsx', sheet = 1)
checkList <- PageURL$Page
driver <- rsDriver(port = 4567L, browser = c("firefox"), version = 'latest', verbose = TRUE, check = TRUE)
remote_driver <- driver[["client"]]
remote_driver$open()
getFBIDs <- function(url){
url <- if(str_detect(url, "https://")) url else paste0("https://", url)
remote_driver$navigate(url)
remote_driver$executeScript("
var pageURL = window.location.href;
var fbqID = (typeof fbq === 'function')
? Object.keys(_fbq.instance.pixelsByID)
: false;
return { pageURL, fbqID }
")
}
# head(checkList)
Pixels <- lapply(checkList, getFBIDs)
crawledData <- as.data.frame(do.call(rbind, Pixels))
# remote_driver$checkError()
# remote_driver$checkStatus
# remote_driver$getAllCookies
remote_driver$closeServer()
remote_driver$close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment