Skip to content

Instantly share code, notes, and snippets.

@mlane3
Last active December 28, 2022 21:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mlane3/4731dde506f4a6e0e21441e013f358d2 to your computer and use it in GitHub Desktop.
Save mlane3/4731dde506f4a6e0e21441e013f358d2 to your computer and use it in GitHub Desktop.
Selenium Example for Rstudio Cloud
# *----------------------------------------------------------------------------------
# | @PROGRAM NAME:
# | @DATE: 05/21/2020
# | @CREATED BY: Abhi Patel
# *----------------------------------------------------------------------------------
# | @PURPOSE: This scripts uses Selenium to log into the EReadKids
# | website used by the County Library to track usage of various
# | fields and help fund book purchases
# *----------------------------------------------------------------------------------
# | @UPDATES: Many parts of this script that do the webscraping have been omitted.
# | Some lines label "OMITTED" contained sensitive file names with configuration information.
# | * In general for the download folder path. "download.default_directory" it is best that
# | the folder path refer to a \\ or \\\\ folder instead of / as we found that / was not
# | reliable for external or shared hard drives.
# | \\\\ComputerName\Users\User.Name\Folder was more reliable to use then "C:/User/User.Name/Folder/
# | * Also uses an encripted MFA protected config file to save the password information.
# | A google keyrings and R to set this up
# | * The url we use for EReadKids is also masked and defaults to the admin homepage.
# *----------------------------------------------------------------------------------
library(RSelenium)
# library(rfulco)
library(RODBC)
library(data.table)
library(tidyverse)
library(lubridate)
library(Distance)
library(ps)
#
# # defining empty df for monthly checkout data
# monthly_checkouts_df <- data.frame(
# Month=character(),
# Checkouts=integer(),
# stringsAsFactors = FALSE
# )
#
# # defining empty df for monthly unique users data
# monthly_unique_users_df <- data.frame(
# Month=character(),
# Users=integer(),
# stringsAsFactors = FALSE
# )
#
# # connecting to sql server
# dbconn <- sql_connect('spmo','SPMO')
#
# # getting start date from sql server
# last_month_entry <- RODBC::sqlQuery(dbconn, 'SELECT Top 1 N.Month FROM [SPMO].[dbo].[Library_eReadKidsMonthly] N ORDER BY N.Month DESC;')
#
# # getting vector of start months needed based on lastest month and current month
# needed_months <- c()
# start_date <- as.Date(last_month_entry[[1]], format = '%Y-%m-%d') %m+% months(1)
#
# hasMore = TRUE
#
# end_date <- floor_date(today(), unit = 'months')
#
# while (hasMore) {
# if (start_date < end_date){
# needed_months <- append(needed_months, as.character(start_date))
# start_date <- start_date %m+% months(1)
# }
# else{
# hasMore = FALSE
# }
# }
#
# if(is.null(needed_months)){
# close(dbconn)
# quit(save = "no")
# }
#
# # make sure there oare no left over source files
# if (!identical(list.files(getwd(), pattern="*.csv"),character(0))) {
# rfulco::archive_file(source_path=getwd(), pattern='*.csv', clean_archive=TRUE, clean_cutoff=14)
# }
#--------------------------------------------------------
# Define some functions
#--------------------------------------------------------
# This seems to fail on the rstudio cloud unbuntu because netstat is no
# longer available.
get_free_port <- function (test_ports = seq(4400, 4450, 1))
{
used_ports_df <- data.table::fread("netstat -aon -p tcp",
skip = 1, header = FALSE)
used_ports <- as.numeric(sub(".*:", "", used_ports_df$V2))
free_ports <- setdiff(test_ports, used_ports)
return(as.integer(free_ports[1]))
}
clean_up <- function(driver) {
driver$client$quit() # close browser (terminates chrome.exe and chromedriver.exe)
child_proc <- ps_children(driver$server$process$as_ps_handle())
for (p in child_proc) { # ensures Windows processes spawned by selenium server are terminated (java.exe and conhost.exe)
ps_kill(p)
ps_is_running(p)
}
driver$server$stop() # stop Selenium server (terminates cmd.exe)
driver$server$process$kill_tree()
rm(driver)
gc()
}
print_process_info <- function(dvr, file_path) {
cat(as.character(ps_pid(dvr$server$process$as_ps_handle())), file=file_path, sep="\n")
child_proc <- ps_children(dvr$server$process$as_ps_handle())
for (p in child_proc) {
cat(as.character(ps_pid(p)), file=file_path, sep="\n", append=TRUE)
}
}
chrome_ops <- list(
chromeOptions =
list(prefs = list(
"profile.default_content_settings.popups" = 0L,
"download.prompt_for_download" = FALSE
),
args = list('--headless')
)
)
# OMITTED
# chrome_ops <- list(
# chromeOptions =
# list(prefs = list(
# "profile.default_content_settings.popups" = 0L,
# "download.prompt_for_download" = FALSE,
# "download.default_directory" = "OMITTED"
# )
# )
# )
# config <- config::get(
# config = "chrome",
# file = "OMITTED"
# )
configdrive = "108.0.5359.71"
# port <- rfulco::get_free_port()
# port <- get_free_port() #Fails in Rstudio CLoud
port <- 4400L
driver <- rsDriver(chromever = configdrive, browser = "chrome", extraCapabilities = chrome_ops, port = port)
# Try firefox
# driver <- rsDriver(port = 4445L,browser = "firefox")
remDr <- driver$client
if(is(driver,"try-error")) { # stop script if rsDriver fails
message(geterrmessage())
message("Execution halted")
} else if(is(driver$client$getCurrentUrl(),"try-error")) { # stop script if browser did not open
message(geterrmessage())
clean_up(driver)
message("Execution halted")
} else {
# print_process_info(driver, paste(getwd(),"../Scripts/pids.txt", sep="/"))
remDr$navigate("https://admin.axis360.org/")
# OMITTED Sensitive steps
# ...
# ...
# ...
# ...
Sys.sleep(5)
clean_up(driver)
}
@mlane3
Copy link
Author

mlane3 commented Dec 28, 2022

So I wanted to through up a quick complete example of a script for R Selenium that works in Rstudio Workbench

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment