Skip to content

Instantly share code, notes, and snippets.

@wpetry
Created October 6, 2021 17:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wpetry/e3bcf2173aa5a081f5ba78f242155786 to your computer and use it in GitHub Desktop.
Save wpetry/e3bcf2173aa5a081f5ba78f242155786 to your computer and use it in GitHub Desktop.
Download search results from a Web of Science web query
#################################################-
## Download search results from a Web of Science web query ----
## W.K. Petry
##
## example usage:
## get_wos_query(url = "https://www.webofscience.com/wos/woscc/summary/83c53a2b-5a39-4468-84f0-c9ffcfba5e91-01b947ba/relevance/1", profile = fprof)
#################################################-
## Define function to fetch WoS query hits ----
#################################################-
get_wos_query <- function(url, browser = c("firefox", "chrome", "phantomjs"),
profile = NULL){
require(RSelenium)
browser <- match.arg(browser)
rD <<- rsDriver(browser = browser, extraCapabilities = profile)
Sys.sleep(2)
remDr <- rD[["client"]]
# go to the query result page
remDr$navigate(url)
Sys.sleep(2)
# close the annoying popups
remDr$findElement(using = "xpath", '//*[@id="pendo-close-guide-8fdced48"]')$clickElement()
Sys.sleep(2)
remDr$findElement(using = "xpath", '//*[@id="pendo-button-e580fcec"]')$clickElement()
Sys.sleep(2.21)
remDr$findElement(using = "xpath", '//*[@id="pendo-button-506b4382"]')$clickElement()
Sys.sleep(1)
# find number of records to be exported
nrecs <- as.integer(sub(",", "",
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/div/app-input-route/app-base-summary-component/app-search-friendly-display/div[1]/app-general-search-friendly-display/h1/span')$getElementText()[[1]]))
for (i in as.character(seq(1, nrecs, by = 1000))) {
if(i != "1") remDr$navigate(url); Sys.sleep(3)
# set upper record number (limit is 1000 records)
j <- as.character(as.integer(i)+999L)
# begin export
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/div/app-input-route/app-base-summary-component/div/div[2]/app-page-controls[1]/div/app-export-option/div/app-export-menu/div/button/span[1]')$clickElement()
Sys.sleep(1)
# select output format as RIS
remDr$findElement(using = "xpath", '//*[@id="exportToRisButton"]')$clickElement()
# choose which records to export
Sys.sleep(1)
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/app-input-route/app-export-overlay/div/div[3]/div[2]/app-export-out-details/div/div[2]/div/fieldset/mat-radio-group/div[3]/mat-radio-button/label/span[1]/span[1]')$clickElement()
Sys.sleep(1)
# clear the default start/end records
remDr$findElement(using = "xpath", '//*[@id="mat-input-0"]')$clearElement()
Sys.sleep(1)
remDr$findElement(using = "xpath", '//*[@id="mat-input-1"]')$clearElement()
# set the starting record number
Sys.sleep(1)
remDr$findElement(using = "xpath", '//*[@id="mat-input-0"]')$sendKeysToElement(list(i))
Sys.sleep(1)
remDr$findElement(using = "xpath", '//*[@id="mat-input-1"]')$sendKeysToElement(list(j))
Sys.sleep(1)
# select 'Full Record' for export
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/app-input-route/app-export-overlay/div/div[3]/div[2]/app-export-out-details/div/div[2]/div/div[1]/wos-select/button/span[1]')$clickElement()
Sys.sleep(1)
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/app-input-route/app-export-overlay/div/div[3]/div[2]/app-export-out-details/div/div[2]/div/div[1]/wos-select/div/div/div[2]/div[3]/span')$clickElement()
Sys.sleep(1)
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/app-input-route/app-export-overlay/div/div[3]/div[2]/app-export-out-details/div/div[2]/div/div[2]/button[1]')$clickElement()
Sys.sleep(10)
}
rD$server$stop()
}
#################################################-
# set profile options to avoid download dialog --
#################################################-
fprof <- RSelenium::makeFirefoxProfile(list(browser.download.dir = "~/Downloads",
browser.download.folderList = 2L,
browser.download.manager.showWhenStarting = FALSE,
browser.helperApps.neverAsk.saveToDisk = "application/json"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment