Skip to content

Instantly share code, notes, and snippets.

@luisDVA
Created April 7, 2023 17:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luisDVA/574c849e4956c385c56d10fefc4b60a0 to your computer and use it in GitHub Desktop.
Save luisDVA/574c849e4956c385c56d10fefc4b60a0 to your computer and use it in GitHub Desktop.
Code commets about loaded packages
# exploring comments about loaded packages
library(bigrquery) # CRAN v1.4.1
library(dplyr) # CRAN v1.1.1
library(stringr) # CRAN v1.5.0
library(readr) # CRAN v2.1.4
library(tidyr) # CRAN v1.3.0
library(rlang) # CRAN v1.1.0
library(purrr) # CRAN v1.0.1
# authorize and fetch from BigQuery
# bq_auth()
RlibmatchesBQtab <- bq_table("YOURBQprojectID", "rscriptsonGH", "libmatches")
RmdlibmatchesBQtab <- bq_table("YOURBQprojectID", "rscriptsonGH", "Rmdlibmatches")
QmdlibmatchesBQtab <- bq_table("YOURBQprojectID", "rscriptsonGH", "Qmdlibmatches")
Rlibmatches <- bq_table_download(RlibmatchesBQtab)
Rmdlibmatches <- bq_table_download(RmdlibmatchesBQtab)
Qmdlibmatches <- bq_table_download(QmdlibmatchesBQtab)
# all formats together
libmatches <- bind_rows(Rlibmatches, Rmdlibmatches, Qmdlibmatches)
# to disk
# write_csv(libmatches,"data/libmatches.csv")
# from file
libmatches <- read_csv("data/libmatches.csv")
# just the lines with package load calls
libmatches <- libmatches %>% mutate(libcalls = str_extract_all(content, "library\\(.+"))
# cleanup inconsistent script parsing
libmatches <- libmatches %>% filter(!str_detect(content, "^expected"))
# split calls rowwise
libmatches <- libmatches %>% unnest_longer(libcalls)
# calls and comments
libmatches <- libmatches %>%
separate(libcalls, into = c("call", "comment"), sep = "#", extra = "merge") %>%
mutate(across(everything(), str_trim))
# clean and separate multiline expressions
libmatches <- libmatches %>% mutate(call = str_remove(call, ";$"))
libmatches <- libmatches %>%
separate_rows(call, sep = ";") %>%
mutate(call = str_trim(call))
# remove non calls
libmatches <- libmatches %>% filter(str_detect(call, "^libr"))
# remove possible iterative loading
libmatches <- libmatches %>% filter(!str_detect(call, "\\[i\\]"))
# remove unmatched parens caused by wrapped library() calls and other issues
# modified from this code golf approach that uses stack data structures
# https://codegolf.stackexchange.com/questions/245625/remove-unmatched-brackets
f <- \(x, `[` = gsub)"if"(x != (x <- "<([^<>]*)>"["{\\1}", x]),
f(x), chartr("{}", "<>", "<|>"["", x]))
libmatches$call <- map_chr(libmatches$call, ~ f(chartr(")(", "><", .x))) %>%
chartr("<>", "()", .)
# parse call arguments (assume 1st is package name)
parselibcalls <- function(libcall) {
as.character(call_args(str2lang(get_expr(libcall)))[[1]])
}
libmatches <- libmatches %>% mutate(
pkgname = map_chr(libmatches$call, possibly(parselibcalls, otherwise = "code error")),
.before = comment
)
# cleanup comments
libmatches <- libmatches %>%
mutate(comment = str_remove(comment, "^[ ]?#")) %>%
mutate(comment = str_squish(comment))
# to disk
# write_csv(libmatches,"data/libmatches_processed.csv")
libmatchesproc <- libmatches
# how many unique files
libmatchesproc %>%
pull(id) %>%
n_distinct()
# pkgs per script
libmatchesproc %>%
count(id) %>%
pull(n) %>%
summary()
# proportion with comments
libmatchesproc <- libmatchesproc %>% mutate(hasComment = !is.na(comment))
libmatchesproc %>%
janitor::tabyl(id, hasComment) %>%
mutate(n = `FALSE` + `TRUE`) %>%
mutate(pctTrue = `TRUE` / n) %>%
pull(`TRUE`) %>%
summary()
# pkgs per file
libmatchesproc %>%
count(id) %>%
arrange(-n)
# Popular comments
libmatches %>%
count(comment) %>%
arrange(-n) %>%
na.omit() %>%
slice(1:20) %>%
knitr::kable()
# detect language
library(cld3) # CRAN v1.5.0
# sample comments
libmatchesproc %>%
filter(hasComment == TRUE) %>%
sample_n(20) %>%
select(pkgname, comment) %>%
knitr::kable()
# to focus on commented calls
libcomments <- libmatchesproc %>% filter(hasComment == TRUE)
libcomments <- libcomments %>% mutate(commentLanguage = detect_language(comment))
# with comments, frequency
libcomments %>%
filter(!is.na(commentLanguage)) %>%
janitor::tabyl(commentLanguage) %>%
arrange(-n) %>%
head() %>%
mutate(across(where(is.numeric), round, 2)) %>%
knitr::kable()
# Spanish language
libcomments %>%
filter(commentLanguage == "es") %>%
distinct(pkgname, comment) %>%
sample_n(17) %>%
knitr::kable()
# remove duplicates (version histories for some files, etc)
libcommentsDdup <- libcomments %>% distinct(pkgname, comment)
# popular pkgs
libcommentsDdup %>%
add_count(pkgname) %>%
distinct(pkgname, n) %>%
top_n(n, n = 10) %>%
arrange(-n) %>%
knitr::kable()
# pkg purpose or functions desired
libcommentsDdup %>%
filter(str_detect(comment, regex("^for |^para |^pour", ignore_case = TRUE))) %>%
sample_n(20) %>%
knitr::kable()
# possible installation notes
libcomments %>%
filter(str_detect(comment, regex("instal|CRAN|github", ignore_case = TRUE))) %>%
View()
sample_n(20) %>% knitr::kable()
# remarks about the tidyverse
libcomments %>%
filter(str_detect(comment, regex("tidyverse", ignore_case = TRUE))) %>%
View()
sample_n(20) %>% knitr::kable()
# viz
library(ggraph) # CRAN v2.0.5
library(tidygraph) # CRAN v1.2.1
library(graphlayouts) # CRAN v0.8.0
library(ggrepel) # CRAN v0.9.1
# only short comments for nicer viz
shortcomments <- libcommentsDdup %>%
mutate(commlength = str_length(comment)) %>%
filter(commlength < 37)
shortcommentstop <- shortcomments %>%
add_count(pkgname) %>%
filter(n > 10) %>%
group_by(pkgname) %>%
sample_n(7)
rndpkgs <-
shortcommentstop %>%
group_by(pkgname) %>%
summarise() %>%
sample_n(5) %>%
pull(pkgname)
# prepare for network structure
fornetwrk <- shortcommentstop %>%
filter(pkgname %in% rndpkgs) %>%
select(1, 2)
pkgnet <- as_tbl_graph(fornetwrk)
ggraph(pkgnet, layout = "nicely") +
geom_edge_link(color = "blue", alpha = 0.2) +
geom_text_repel(aes(x, y,
label = name, segment.inflect = TRUE,
family = "Atkinson Hyperlegible"
), size = 3) + theme_graph()
# ggsave("pkggraph.png", width = 7, height = 5, units = "in")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment