Last active
December 28, 2022 21:50
-
-
Save mlane3/4731dde506f4a6e0e21441e013f358d2 to your computer and use it in GitHub Desktop.
Selenium Example for Rstudio Cloud
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *---------------------------------------------------------------------------------- | |
# | @PROGRAM NAME: | |
# | @DATE: 05/21/2020 | |
# | @CREATED BY: Abhi Patel | |
# *---------------------------------------------------------------------------------- | |
# | @PURPOSE: This scripts uses Selenium to log into the EReadKids | |
# | website used by the County Library to track usage of various | |
# | fields and help fund book purchases | |
# *---------------------------------------------------------------------------------- | |
# | @UPDATES: Many parts of this script that do the webscraping have been omitted. | |
# | Some lines label "OMITTED" contained sensitive file names with configuration information. | |
# | * In general for the download folder path. "download.default_directory" it is best that | |
# | the folder path refer to a \\ or \\\\ folder instead of / as we found that / was not | |
# | reliable for external or shared hard drives. | |
# | \\\\ComputerName\Users\User.Name\Folder was more reliable to use then "C:/User/User.Name/Folder/ | |
# | * Also uses an encripted MFA protected config file to save the password information. | |
# | A google keyrings and R to set this up | |
# | * The url we use for EReadKids is also masked and defaults to the admin homepage. | |
# *---------------------------------------------------------------------------------- | |
library(RSelenium) | |
# library(rfulco) | |
library(RODBC) | |
library(data.table) | |
library(tidyverse) | |
library(lubridate) | |
library(Distance) | |
library(ps) | |
# | |
# # defining empty df for monthly checkout data | |
# monthly_checkouts_df <- data.frame( | |
# Month=character(), | |
# Checkouts=integer(), | |
# stringsAsFactors = FALSE | |
# ) | |
# | |
# # defining empty df for monthly unique users data | |
# monthly_unique_users_df <- data.frame( | |
# Month=character(), | |
# Users=integer(), | |
# stringsAsFactors = FALSE | |
# ) | |
# | |
# # connecting to sql server | |
# dbconn <- sql_connect('spmo','SPMO') | |
# | |
# # getting start date from sql server | |
# last_month_entry <- RODBC::sqlQuery(dbconn, 'SELECT Top 1 N.Month FROM [SPMO].[dbo].[Library_eReadKidsMonthly] N ORDER BY N.Month DESC;') | |
# | |
# # getting vector of start months needed based on lastest month and current month | |
# needed_months <- c() | |
# start_date <- as.Date(last_month_entry[[1]], format = '%Y-%m-%d') %m+% months(1) | |
# | |
# hasMore = TRUE | |
# | |
# end_date <- floor_date(today(), unit = 'months') | |
# | |
# while (hasMore) { | |
# if (start_date < end_date){ | |
# needed_months <- append(needed_months, as.character(start_date)) | |
# start_date <- start_date %m+% months(1) | |
# } | |
# else{ | |
# hasMore = FALSE | |
# } | |
# } | |
# | |
# if(is.null(needed_months)){ | |
# close(dbconn) | |
# quit(save = "no") | |
# } | |
# | |
# # make sure there oare no left over source files | |
# if (!identical(list.files(getwd(), pattern="*.csv"),character(0))) { | |
# rfulco::archive_file(source_path=getwd(), pattern='*.csv', clean_archive=TRUE, clean_cutoff=14) | |
# } | |
#-------------------------------------------------------- | |
# Define some functions | |
#-------------------------------------------------------- | |
# This seems to fail on the rstudio cloud unbuntu because netstat is no | |
# longer available. | |
get_free_port <- function (test_ports = seq(4400, 4450, 1)) | |
{ | |
used_ports_df <- data.table::fread("netstat -aon -p tcp", | |
skip = 1, header = FALSE) | |
used_ports <- as.numeric(sub(".*:", "", used_ports_df$V2)) | |
free_ports <- setdiff(test_ports, used_ports) | |
return(as.integer(free_ports[1])) | |
} | |
clean_up <- function(driver) { | |
driver$client$quit() # close browser (terminates chrome.exe and chromedriver.exe) | |
child_proc <- ps_children(driver$server$process$as_ps_handle()) | |
for (p in child_proc) { # ensures Windows processes spawned by selenium server are terminated (java.exe and conhost.exe) | |
ps_kill(p) | |
ps_is_running(p) | |
} | |
driver$server$stop() # stop Selenium server (terminates cmd.exe) | |
driver$server$process$kill_tree() | |
rm(driver) | |
gc() | |
} | |
print_process_info <- function(dvr, file_path) { | |
cat(as.character(ps_pid(dvr$server$process$as_ps_handle())), file=file_path, sep="\n") | |
child_proc <- ps_children(dvr$server$process$as_ps_handle()) | |
for (p in child_proc) { | |
cat(as.character(ps_pid(p)), file=file_path, sep="\n", append=TRUE) | |
} | |
} | |
chrome_ops <- list( | |
chromeOptions = | |
list(prefs = list( | |
"profile.default_content_settings.popups" = 0L, | |
"download.prompt_for_download" = FALSE | |
), | |
args = list('--headless') | |
) | |
) | |
# OMITTED | |
# chrome_ops <- list( | |
# chromeOptions = | |
# list(prefs = list( | |
# "profile.default_content_settings.popups" = 0L, | |
# "download.prompt_for_download" = FALSE, | |
# "download.default_directory" = "OMITTED" | |
# ) | |
# ) | |
# ) | |
# config <- config::get( | |
# config = "chrome", | |
# file = "OMITTED" | |
# ) | |
configdrive = "108.0.5359.71" | |
# port <- rfulco::get_free_port() | |
# port <- get_free_port() #Fails in Rstudio CLoud | |
port <- 4400L | |
driver <- rsDriver(chromever = configdrive, browser = "chrome", extraCapabilities = chrome_ops, port = port) | |
# Try firefox | |
# driver <- rsDriver(port = 4445L,browser = "firefox") | |
remDr <- driver$client | |
if(is(driver,"try-error")) { # stop script if rsDriver fails | |
message(geterrmessage()) | |
message("Execution halted") | |
} else if(is(driver$client$getCurrentUrl(),"try-error")) { # stop script if browser did not open | |
message(geterrmessage()) | |
clean_up(driver) | |
message("Execution halted") | |
} else { | |
# print_process_info(driver, paste(getwd(),"../Scripts/pids.txt", sep="/")) | |
remDr$navigate("https://admin.axis360.org/") | |
# OMITTED Sensitive steps | |
# ... | |
# ... | |
# ... | |
# ... | |
Sys.sleep(5) | |
clean_up(driver) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
So I wanted to through up a quick complete example of a script for R Selenium that works in Rstudio Workbench