Skip to content

Instantly share code, notes, and snippets.

@MichaelChirico
Created January 26, 2024 15:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MichaelChirico/247ea9500460dca239f031e74bdcf76b to your computer and use it in GitHub Desktop.
Save MichaelChirico/247ea9500460dca239f031e74bdcf76b to your computer and use it in GitHub Desktop.
readChar() usage on CRAN
library(jsonlite)
library(data.table)
library(lintr)
read_page <- function(page) {
tmp <- tempfile()
on.exit(unlink(tmp))
system2("curl",
c("--location", "--request",
"GET", sprintf("'https://api.github.com/search/code?q=readChar+org:cran+language:R+-path:.Rd&per_page=100&page=%d'", page),
"--header", sprintf('"Authorization: Bearer %s"', Sys.getenv("GITHUB_PAT")),
"--header", '"X-GitHub-Api-Version: 2022-11-28"'),
stdout=tmp)
jsonlite::fromJSON(tmp)
}
n_pages <- Inf
page <- 1L
results <- list()
while (page <= n_pages) {
Sys.sleep(0.2) # rate-limiting
page_data <- read_page(page)
if (page == 1L) n_pages <- ceiling(page_data$total_count / 100)
page <- page + 1L
results[[page]] <- data.frame(
file_path = page_data$items$path,
repo = page_data$items$repository$full_name
)
}
results <- rbindlist(results)
results[, raw_url := sprintf("https://raw.githubusercontent.com/%s/master/%s", repo, file_path)]
setorder(results, raw_url)
full_file_xpath_fmt <- "
//SYMBOL_FUNCTION_CALL[text() = 'readChar']
/parent::expr
/following-sibling::expr[expr[%s]]
"
read_char_linter <- undesirable_function_linter(c(readChar = "any readChar() usage"))
file_size_linter <- make_linter_from_xpath(
sprintf(full_file_xpath_fmt, "SYMBOL_FUNCTION_CALL[text() = 'file.size']"),
"readChar() with file.size()")
file_info_linter <- make_linter_from_xpath(
sprintf(full_file_xpath_fmt, "expr/SYMBOL_FUNCTION_CALL[text() = 'file.info']"),
"readChar() with file.info()$size")
lint_info <- rbindlist(lapply(results$raw_url, function(url) {
Sys.sleep(0.2)
download.file(url, tmp <- tempfile(), quiet = TRUE)
on.exit(unlink(tmp))
lints <- as.data.table(lint(tmp,
linters = list(
n_calls = read_char_linter,
n_file_size = file_size_linter(),
n_file_info = file_info_linter())))
lints[, raw_url := url]
lints
}))
results <- merge(results,
dcast(lint_info[linter != "error"], raw_url ~ linter, fun.aggregate = length),
by = "raw_url")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment