Skip to content

Instantly share code, notes, and snippets.

@alex23lemm
Last active August 29, 2015 14:08
Show Gist options
  • Save alex23lemm/82df2d8cfe80be9f50ef to your computer and use it in GitHub Desktop.
Save alex23lemm/82df2d8cfe80be9f50ef to your computer and use it in GitHub Desktop.
RSelenium: Navigating using PhantomJS directly
# Load libraries and config file; define variables ----------------------------
library(RSelenium)
library(yaml)
library(dplyr)
library(httr)
config <- yaml.load_file('config.yml')
# ID of report which should be downloaded
report_id = 29191
base_url = "https://www.openair.com/"
# Start phantomjs in webdriver mode -------------------------------------------
pJS <- phantom(extras = c('--ignore-ssl-errors=yes', '--ssl-protocol=tlsv1'))
Sys.sleep(5)
remDrv <- remoteDriver(browserName = 'phantomjs')
remDrv$open()
# Enter site and navigate to reports section -----------------------------------
remDrv$navigate(paste0(base_url, 'index.pl'))
# Fill out login form and enter site
remDrv$findElement(using = 'name', 'account_nickname')$sendKeysToElement(list(config$openair$company))
remDrv$findElement(using = 'name', 'user_nickname')$sendKeysToElement(list(config$openair$user))
remDrv$findElement(using = 'name', 'password')$sendKeysToElement(list(config$openair$password))
remDrv$findElement(using = 'css selector', '.loginFormBtn')$clickElement()
# Open menu and navigate to proxy user page
remDrv$findElement(using = 'css selector', '.nav_user')$clickElement()
proxy_page <- remDrv$findElement(using = 'xpath', "//a[contains(text(), 'Log in as')]")$getElementAttribute('href')[[1]]
remDrv$navigate(proxy_page)
# Continue browsing with proxy user
remDrv$findElement(using = 'xpath', paste0("//a[text()='", config$openair$proxy, "']"))$sendKeysToElement(list(key = 'enter'))
#remDrv$findElement(using = 'link text', config$openair$proxy)$sendKeysToElement(list(key = 'enter'))
remDrv$switchToWindow(remDrv$getWindowHandles()[[1]][2])
Sys.sleep(5)
# Navigate to reports section
#//a[text()='text_i_want_to_find']/@href
remDrv$findElement(using = 'xpath', "//a[text()='Reports']")$sendKeysToElement(list(key = 'enter'))
#remDrv$findElement(using = 'link text', 'Reports')$sendKeysToElement(list(key = 'enter'))
remDrv$findElement(using = 'xpath', "//a[text()='Saved reports']")$sendKeysToElement(list(key = 'enter'))
#remDrv$findElement(using = 'link text', 'Saved reports')$sendKeysToElement(list(key = 'enter'))
# Identify report of choice ----------------------------------------------------
report_links <- remDrv$getPageSource()[[1]] %>% htmlParse %>% xmlRoot %>%
xpathSApply('//a[@title="Download"]/@href')
index <- which(grepl(report_id , report_links))
remDrv$navigate(paste0(base_url, report_links[index]))
# Download report --------------------------------------------------------------
download_link <- remDrv$findElements(using = 'xpath', "//a[text()='Click here']")[[1]]$getElementAttribute('href')[[1]]
#download_link <- remDrv$findElements("link text", "Click here")[[1]]$getElementAttribute('href')[[1]]
my_cookies <- remDrv$getAllCookies()
my_cookies <- do.call(rbind.data.frame, my_cookies)
my_cookies <- my_cookies %>% transmute(
name = as.character(name),
value = as.character(value)
)
cookies <- my_cookies$value
names(cookies) <- my_cookies$name
parsed_csv <- GET(download_link, set_cookies(.cookies = cookies)) %>%
content('parsed')
write.csv(parsed_csv, 'phantom_report8.csv', row.names = FALSE)
remDrv$close()
pJS$stop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment