Skip to content

Instantly share code, notes, and snippets.

@mihaiconstantin
Last active May 18, 2024 19:27
Show Gist options
  • Save mihaiconstantin/c468b309402a252a600973513a9d68f4 to your computer and use it in GitHub Desktop.
Save mihaiconstantin/c468b309402a252a600973513a9d68f4 to your computer and use it in GitHub Desktop.
Daily downloads trend for `R` packages published on `CRAN`
# A function to quickly grasp the `CRAN` daily downloads.
plot_cran_downloads_trend <- function(package, x_axis_frequency = "3 months") {
# Packages required.
packages <- c("cranlogs", "mgcv", "curl", "ggplot2", "scales")
# Load required packages.
sapply(packages, function(pkg) {
# Require the package.
status <- suppressPackageStartupMessages(require(pkg, character.only = TRUE))
# If the package is not installed, install it.
if (!status) {
# Install the package.
install.packages(pkg, dependencies = TRUE)
# Load the package.
suppressPackageStartupMessages(require(pkg, character.only = TRUE))
}
})
# Specify the `CRAN` archive page.
base_cran_archive_page <- "https://cran.r-project.org/src/contrib/Archive/"
# Specify the `CRAN` package page.
base_cran_package_page <- "https://CRAN.R-project.org/package="
# Create the archive page.
cran_archive_page <- paste0(base_cran_archive_page, package, "/")
# Create the package page.
cran_package_page <- paste0(base_cran_package_page, package)
# Open a connect to the `CRAN` archive page for the current package.
connection <- curl(cran_archive_page)
# Try to get the version from the archive or package page.
page_text <- tryCatch(
expr = {
# Extract archive page text.
readLines(connection)
},
error = function(e) {
# Close the connection.
close(connection)
# Use the package page in case of `404` `HTTP` status.
connection <- curl(cran_package_page)
# Extract package page text.
readLines(connection)
},
# Close the connection.
finally = {
# Close.
close(connection)
}
)
# Extract the first date as the date the package was published on `CRAN`.
publish_date <- as.Date(
# Extract the date.
regmatches(page_text, regexpr("\\d{4}-\\d{2}-\\d{2}", page_text))[1]
)
# Recall what day was yesterday (i.e., usually trackers lag by one day).
yesterday <- as.Date(format(Sys.time(), "%Y-%m-%d")) - 1
# Assume the package has been always on `CRAN` since first published.
number_days <- as.numeric(yesterday - publish_date)
# Fetch the daily downloads.
downloads <- cran_downloads(package, from = publish_date, to = yesterday)
# Compute mean daily downloads.
mean_daily_downloads <- mean(downloads$count)
# Estimate trend.
model <- gam(count ~ s(as.numeric(date), bs = "cr"), data = downloads)
# Plot.
plot <- ggplot(downloads, aes(x = date, y = count)) +
# Daily downloads.
geom_point(
color = "gray",
shape = 19,
alpha = 0.8,
size = 2,
stroke = NA
) +
# The download trend.
geom_line(
aes(
y = fitted(model)
),
color = "darkred",
linewidth = 1
) +
# The mean daily downloads.
geom_hline(
yintercept = mean_daily_downloads,
color = "black",
alpha = 0.8,
linetype = "dashed"
) +
# The download dates.
scale_x_date(
date_breaks = x_axis_frequency,
date_labels = "%b %Y"
) +
# The actual download values.
scale_y_continuous(
labels = comma,
breaks = seq(0, max(downloads$count), by = mean_daily_downloads)
) +
# The plot text.
labs(
title = paste0("Daily downloads of '", package, "' package on CRAN"),
subtitle = paste0(
"First published on ", publish_date, " (", scales::comma(number_days), " days ago) | ",
"Total downloads: ", scales::comma(sum(downloads$count)), " | ",
"Average daily downloads: ", scales::comma(round(mean_daily_downloads)), " | ",
format(Sys.time(), "Plotted on %Y-%m-%d at %H:%M:%S")
),
x = "Date",
y = "Number of Downloads"
) +
# Theme options
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
)
# Plot the plot.
plot(plot)
# Return the plot silently.
invisible(plot)
}
# Plot the daily downloads of the `ggplot2` package.
plot_cran_downloads_trend("ggplot2", x_axis_frequency = "1 year")
# Plot the daily downloads of the `Rcpp` package.
plot_cran_downloads_trend("Rcpp", x_axis_frequency = "1 year")
# Plot the daily downloads of the `lme4` package.
plot_cran_downloads_trend("lme4", x_axis_frequency = "1 year")
# Plot the daily downloads of the `lavaan` package.
plot_cran_downloads_trend("lavaan", x_axis_frequency = "5 months")
@mihaiconstantin
Copy link
Author

mihaiconstantin commented May 3, 2023

Example of a resulting plot (i.e., for lavaan in this case):

daily-downloads-lavaan-2023-05-03

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment