Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Calculate top 15 commonly used R packages given a directory with R scripts
# Directory with R scripts
setwd("~/Sync/BGU")
# Prepare data
files = list.files(pattern = "\\.R$", recursive = TRUE)
dat = lapply(files, readLines)
dat = lapply(dat, function(x) x[grepl("library\\(", x)])
dat = lapply(dat, function(x) gsub(".*library\\(", "", x))
dat = lapply(dat, function(x) gsub("\\).*", "", x))
dat = lapply(dat, function(x) gsub('"', '', x))
dat = lapply(dat, unique)
dat = unlist(dat)
dat = table(dat)
dat = dat / length(files)
dat = as.data.frame(dat)
colnames(dat) = c("package", "prop")
# Sort & filter
dat = dat[order(dat$prop, decreasing = TRUE), ]
dat = head(dat, 15)
dat$package = factor(dat$package, levels = rev(dat$package))
# Plot
library(ggplot2)
library(scales)
ggplot(dat, aes(x = prop, y = package)) +
geom_col(fill = "grey", col = "black", size = 0.2) +
scale_x_continuous("Scripts (%)", labels = percent) +
theme_bw() +
theme(axis.title.y = element_blank())
ggsave("packages_proportion_plot.png", width = 4.5, height = 4.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment