Skip to content

Instantly share code, notes, and snippets.

@r2evans
Created January 21, 2022 04:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save r2evans/ed0d132166bfbd9473d99b4fac7d65db to your computer and use it in GitHub Desktop.
Save r2evans/ed0d132166bfbd9473d99b4fac7d65db to your computer and use it in GitHub Desktop.
#' Convert an StackOverflow URL to a local filename
#'
#' @param url character string, typically starting with
#' "https://stackoverflow.com/..."
#' @param dir logical, whether this should be a directory or a file
#' @param ext character, file extension
#' @param create logical, whether to create it or just report on it
#' @param force logical, whether to resort to a question number if the
#' user cannot be found
#' @param base character, the path under which all others will be
#' based
#' @param clip logical, whether to attempt to write to the clipboard
#' with 'writeLines'
#' @return character string for the file for code, and the file has
#' been created and pre-populated with the URL on the first line
#' @export
SE <- function(url, dir = FALSE, ext = if (dir) "" else ".R",
create = TRUE, force = FALSE, base = "~/StackOverflow",
clip = interactive()) {
if (!requireNamespace("rvest")) {
stop("'rvest' package is not available")
}
if (missing(url)) {
# let's try the clipboard
if (clip) {
url <- suppressWarnings(readLines("clipboard"))
}
if (!is.character(url) || !nzchar(url) || !grepl("^http", url)) {
stop("argument url is missing, and the clipboard does not start with 'http'")
}
}
# remove sublink anchor refs
url <- gsub("#[^#]*$", "", url)
question_number <- gsub(".*/questions/(\\d+)/.*", "\\1", url)
if (!nzchar(question_number) || grepl("\\D", question_number)) {
stop("unable to find question number: ", url)
}
hand <- try(xml2::read_html(url), silent = TRUE)
if (inherits(hand, "try-error")) {
stop("unable to read the URL")
}
author <- try({
rvest::html_attr(
rvest::html_nodes(rvest::html_nodes(hand, ".question .user-info .user-details"), "a"),
"href")}, silent = TRUE)
# I think these list editors, too ...
if (inherits(author, "try-error") || length(author) < 1 || !is.character(author) || !nzchar(author)) {
stop("unable to find the author: ", as.character(author))
}
if (length(author) > 1L) {
warning("multiple authors: ", paste(author, collapse = ", "))
author <- tail(author, n = 1)
}
author_number <- gsub("/users/(\\d+)/?.*", "\\1", author)
author_name <- gsub("/users/\\d+/", "", author)
if (grepl("\\D", author_number)) {
msg <- paste("malformed author:", author)
if (force) warning(msg) else stop(msg)
author <- "questions"
}
if (grepl("/", author_name) || !nzchar(author_name)) {
warning("malformed author name: ", sQuote(author_name))
author_name <- "(unk)"
}
path <- file.path(base, author_number, question_number, fsep = "/")
if (dir) {
path <- file.path(path, "question", fsep = "/")
}
if (!anyNA(ext) && nzchar(ext)) path <- paste0(path, ext)
if (create) {
dir.create(dirname(path), recursive = TRUE, showWarnings = FALSE)
}
if (clip) {
suppressWarnings(writeLines(path, "clipboard", sep = ""))
}
if (create) {
append <- TRUE
contents <- character(0)
if (file.exists(path)) {
partialurl <- gsub("^(.*/questions/[0-9]+/).*", "\\1", url)
# somewhat inefficient but hopefully sufficient
alllines <- gsub("^[# ]*", "", readLines(path))
if (partialurl %in% substr(alllines, 1, nchar(partialurl))) {
append <- FALSE
warning("existing question, no change")
} else {
append <- TRUE
warning("user file exists, appending new question")
contents <- c(contents, "\n ")
}
}
contents <- c(contents, paste("#", url), "")
if (append) write(contents, path, append = TRUE)
}
message("URL : ", url)
message("Author : ", author_number, ", ", author_name)
message("Question: ", question_number)
message("Path : ", path)
banned <- file.path(base, author_number, "banned")
if (file.exists(banned)) {
warning("\n### BANNED", call. = FALSE)
cat(paste("###", readLines(banned, warn = FALSE), collapse = "\n"), "\n")
}
invisible(path)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment