Last active
May 18, 2024 19:27
-
-
Save mihaiconstantin/c468b309402a252a600973513a9d68f4 to your computer and use it in GitHub Desktop.
Daily downloads trend for `R` packages published on `CRAN`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A function to quickly grasp the `CRAN` daily downloads. | |
plot_cran_downloads_trend <- function(package, x_axis_frequency = "3 months") { | |
# Packages required. | |
packages <- c("cranlogs", "mgcv", "curl", "ggplot2", "scales") | |
# Load required packages. | |
sapply(packages, function(pkg) { | |
# Require the package. | |
status <- suppressPackageStartupMessages(require(pkg, character.only = TRUE)) | |
# If the package is not installed, install it. | |
if (!status) { | |
# Install the package. | |
install.packages(pkg, dependencies = TRUE) | |
# Load the package. | |
suppressPackageStartupMessages(require(pkg, character.only = TRUE)) | |
} | |
}) | |
# Specify the `CRAN` archive page. | |
base_cran_archive_page <- "https://cran.r-project.org/src/contrib/Archive/" | |
# Specify the `CRAN` package page. | |
base_cran_package_page <- "https://CRAN.R-project.org/package=" | |
# Create the archive page. | |
cran_archive_page <- paste0(base_cran_archive_page, package, "/") | |
# Create the package page. | |
cran_package_page <- paste0(base_cran_package_page, package) | |
# Open a connect to the `CRAN` archive page for the current package. | |
connection <- curl(cran_archive_page) | |
# Try to get the version from the archive or package page. | |
page_text <- tryCatch( | |
expr = { | |
# Extract archive page text. | |
readLines(connection) | |
}, | |
error = function(e) { | |
# Close the connection. | |
close(connection) | |
# Use the package page in case of `404` `HTTP` status. | |
connection <- curl(cran_package_page) | |
# Extract package page text. | |
readLines(connection) | |
}, | |
# Close the connection. | |
finally = { | |
# Close. | |
close(connection) | |
} | |
) | |
# Extract the first date as the date the package was published on `CRAN`. | |
publish_date <- as.Date( | |
# Extract the date. | |
regmatches(page_text, regexpr("\\d{4}-\\d{2}-\\d{2}", page_text))[1] | |
) | |
# Recall what day was yesterday (i.e., usually trackers lag by one day). | |
yesterday <- as.Date(format(Sys.time(), "%Y-%m-%d")) - 1 | |
# Assume the package has been always on `CRAN` since first published. | |
number_days <- as.numeric(yesterday - publish_date) | |
# Fetch the daily downloads. | |
downloads <- cran_downloads(package, from = publish_date, to = yesterday) | |
# Compute mean daily downloads. | |
mean_daily_downloads <- mean(downloads$count) | |
# Estimate trend. | |
model <- gam(count ~ s(as.numeric(date), bs = "cr"), data = downloads) | |
# Plot. | |
plot <- ggplot(downloads, aes(x = date, y = count)) + | |
# Daily downloads. | |
geom_point( | |
color = "gray", | |
shape = 19, | |
alpha = 0.8, | |
size = 2, | |
stroke = NA | |
) + | |
# The download trend. | |
geom_line( | |
aes( | |
y = fitted(model) | |
), | |
color = "darkred", | |
linewidth = 1 | |
) + | |
# The mean daily downloads. | |
geom_hline( | |
yintercept = mean_daily_downloads, | |
color = "black", | |
alpha = 0.8, | |
linetype = "dashed" | |
) + | |
# The download dates. | |
scale_x_date( | |
date_breaks = x_axis_frequency, | |
date_labels = "%b %Y" | |
) + | |
# The actual download values. | |
scale_y_continuous( | |
labels = comma, | |
breaks = seq(0, max(downloads$count), by = mean_daily_downloads) | |
) + | |
# The plot text. | |
labs( | |
title = paste0("Daily downloads of '", package, "' package on CRAN"), | |
subtitle = paste0( | |
"First published on ", publish_date, " (", scales::comma(number_days), " days ago) | ", | |
"Total downloads: ", scales::comma(sum(downloads$count)), " | ", | |
"Average daily downloads: ", scales::comma(round(mean_daily_downloads)), " | ", | |
format(Sys.time(), "Plotted on %Y-%m-%d at %H:%M:%S") | |
), | |
x = "Date", | |
y = "Number of Downloads" | |
) + | |
# Theme options | |
theme_bw() + | |
theme( | |
axis.text.x = element_text(angle = 45, hjust = 1), | |
) | |
# Plot the plot. | |
plot(plot) | |
# Return the plot silently. | |
invisible(plot) | |
} | |
# Plot the daily downloads of the `ggplot2` package. | |
plot_cran_downloads_trend("ggplot2", x_axis_frequency = "1 year") | |
# Plot the daily downloads of the `Rcpp` package. | |
plot_cran_downloads_trend("Rcpp", x_axis_frequency = "1 year") | |
# Plot the daily downloads of the `lme4` package. | |
plot_cran_downloads_trend("lme4", x_axis_frequency = "1 year") | |
# Plot the daily downloads of the `lavaan` package. | |
plot_cran_downloads_trend("lavaan", x_axis_frequency = "5 months") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example of a resulting plot (i.e., for
lavaan
in this case):