Skip to content

Instantly share code, notes, and snippets.

@daattali
Last active June 24, 2019 09:00
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save daattali/0d2c1d2d8f60480d4da8d6f1487e0941 to your computer and use it in GitHub Desktop.
Save daattali/0d2c1d2d8f60480d4da8d6f1487e0941 to your computer and use it in GitHub Desktop.
Use RSelenium to automatically upload many FASTQ files, submit each to "full analysis" when it's ready, and download the analyzed file on Taxonomer.com
# This script uploads all the FAST files to the taxonomer server (only one file can be uploaded at a time)
# Assumes that you have RSelenium package installed and that you've got a simple selenium example to work
if (FALSE) {
fastq_files <- c(
list.files(# WHERE ARE THE FILES?, pattern = "fastq.gz$", full.names = TRUE)
)
login_password <- "" # what is my password???
library(RSelenium)
# Log in
startServer()
remDr <- remoteDriver$new()
remDr$open()
remDr$navigate("https://www.taxonomer.com/login")
usernameEl <- remDr$findElement(using = 'id', value = "username")
usernameEl$sendKeysToElement(list("daattali+taxonomer@gmail.com"))
passwordEl <- remDr$findElement(using = 'id', value = "password")
passwordEl$sendKeysToElement(list(login_password, key = "enter"))
remDr$setImplicitWaitTimeout(milliseconds = 5000)
Sys.sleep(2)
}
lapply(fastq_files, function(fastq) {
cat("Uploading ", fastq, "\n")
fastq <- normalizePath(fastq)
# Go to the Analyses page
remDr$navigate("https://www.taxonomer.com/analyses/new")
Sys.sleep(10)
# Fill out the name and continue
nameEl <- remDr$findElement(using = 'id', value ="analysis-name")
nameEl$sendKeysToElement(list(basename(fastq), key = "enter"))
Sys.sleep(10)
# Make the file input visible because selenium won't be able to interact with it while hidden
jsscript <- '
var input = $("input[type=file]")[1];
input.style = "";
input.className = "deanclass";
input.parentElement.style = "";
input.parentElement.parentElement.parentElement.className = "";
'
remDr$executeScript(jsscript, args = list())
Sys.sleep(3)
inputEl <- remDr$findElement(using = 'css selector', value = ".deanclass")
# Submit the file
inputEl$sendKeysToElement(list(fastq))
# Now that the file is uploading, need to constnatly check when the upload is done
# and when the "quick analysis" is done.
attempt <- 1
error <- FALSE
success <- FALSE
wait <- 30 # how long (seconds) to wait before checking the status of the current upload
maxWait <- 60 # how long (minutes) until giving up on uploading a file
maxAttempts <- 60 * maxWait / wait # don't change this - it calculates when to give up
while(TRUE) {
cat("Attempt #", attempt, "\t")
Sys.sleep(wait)
# See if the error icon is visible
tryCatch({
inputEl <- remDr$findElement(using = 'css selector', value = "#quick-analysis i.fa-times")
assign("error", TRUE, inherits = TRUE)
cat("ERROR: An error occurred with the quick analysis\n")
}, error = function(err) {
})
if (error) break
# see if the progress bar is still present or not
tryCatch({
progressBar <- remDr$findElement(using = 'css selector', value = "#progress-bar>.progress>.progress-bar")
progress <- progressBar$getElementAttribute("aria-valuenow")[[1]]
cat("Progress at ", progress, "\t")
if (!progressBar$isElementDisplayed()[[1]]) {
assign("success", TRUE, inherits = TRUE)
cat("Progress is done!\t")
# See if the quick analysis finished
quickDone <- FALSE
while(TRUE) {
tryCatch({
inputEl <- remDr$findElement(using = 'css selector', value = "#quick-analysis i.fa-spinner")
cat("still processing...\t")
}, error = function(err) {
cat("done processing\t")
assign("quickDone", TRUE, inherits = TRUE)
})
if (quickDone) break
Sys.sleep(wait)
}
cat("SUCCESS: Quick analysis done\t")
}
}, error = function(err) {
})
if (success) break
if (attempt > maxAttempts) {
assign("error", TRUE, inherits = TRUE)
cat("ERROR: Tried for too long, giving up\n")
break
}
assign("attempt", attempt + 1, inherits = TRUE)
}
cat("\n")
})
# This script submits analyses to "Full Analysis"
# (only 3 analyses can go on at the same time, so need to constantly check how many are being processed)
if (FALSE) {
login_password <- "" # what is my password???
library(RSelenium)
# Log in
startServer()
remDr <- remoteDriver$new()
remDr$open()
remDr$navigate("https://www.taxonomer.com/login")
usernameEl <- remDr$findElement(using = 'id', value = "username")
usernameEl$sendKeysToElement(list("daattali+taxonomer@gmail.com"))
passwordEl <- remDr$findElement(using = 'id', value = "password")
passwordEl$sendKeysToElement(list(login_password, key = "enter"))
remDr$setImplicitWaitTimeout(milliseconds = 5000)
Sys.sleep(2)
}
while(TRUE) {
remDr$navigate("https://www.taxonomer.com/analyses")
Sys.sleep(20)
while(TRUE) {
# Count how many jobs are queued or being analyzed currently
waiting <- length(remDr$findElements(using = 'css selector', value = "#page .panel table td button.btn-warning"))
analyzing <- length(remDr$findElements(using = 'css selector', value = "#page .panel table td .fa-spinner.fa-pulse"))
cat(waiting, " in queue and ", analyzing, " processing ")
# there's already 3 items in the queue, so try again in one minute
if (waiting + analyzing >= 3) {
remDr$refresh()
Sys.sleep(60)
} else {
break
}
}
# In javascript, find the oldest row that hasn't been processed and go to that page
jsscript <- '
var rows = $("#page .panel table tbody tr").get().reverse();
var foundlink = false;
$.each(rows, function(idx) {
if (foundlink) return false;
var row = rows[idx];
var icons = row.getElementsByTagName("td")[3].getElementsByTagName("i");
if(icons.length == 0) {
var link = row.getElementsByTagName("a")[0].href;
window.location = link;
foundlink = true;
}
})'
remDr$executeScript(jsscript, args = list())
# If the URL hasn't changed, it means there are no more samples to process
Sys.sleep(10)
if (remDr$getCurrentUrl()[[1]] == "https://www.taxonomer.com/analyses") {
cat("NO MORE SAMPLES TO PROCESS!\n")
break
}
# Get sample name
name <- remDr$findElement(using = "id", value = "sample-names")$getElementText()[[1]]
cat("On page of sample:", name, "\t")
# Submit to full analysis
remDr$findElement(using = 'id', value = "full-analysis")$clickElement()
Sys.sleep(10)
tryCatch({
remDr$findElement(using = 'css selector', value = "#modal-dialog .modal-footer .btn-primary")$clickElement()
cat("SUCCESS: File submitted for Full Analysis\n")
Sys.sleep(5)
}, error = function(err) {
cat("ERROR: Error trying to submit file to full analysis\n")
})
}
# This script downloads all the finished analysis files
if (FALSE) {
fastq_files <- c(
# NAMES OF THE FILES TO DOWNLOAD
)
login_password <- "" # what is my password???
###################
library(RSelenium)
# Log in
startServer()
remDr <- remoteDriver$new()
remDr$open()
remDr$navigate("https://www.taxonomer.com/login")
usernameEl <- remDr$findElement(using = 'id', value = "username")
usernameEl$sendKeysToElement(list("daattali+taxonomer@gmail.com"))
passwordEl <- remDr$findElement(using = 'id', value = "password")
passwordEl$sendKeysToElement(list(login_password, key = "enter"))
remDr$setImplicitWaitTimeout(milliseconds = 5000)
Sys.sleep(2)
}
lapply(fastq_files, function(fastq) {
cat("Downloading ", fastq, "\n")
# Go to the Analyses page
remDr$navigate("https://www.taxonomer.com/analyses")
Sys.sleep(20)
el <- remDr$findElement(using = "partial link text", value = fastq)
url <- el$getElementAttribute("href")[[1]]
remDr$navigate(url)
Sys.sleep(20)
remDr$findElement(using = 'id', value = "full-analysis")$clickElement()
Sys.sleep(5)
remDr$findElement(using = 'id', value = "download-results")$clickElement()
Sys.sleep(3)
remDr$findElement(using = 'id', value = "taxonomerDownload")$clickElement()
Sys.sleep(5)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment