Skip to content

Instantly share code, notes, and snippets.

@sfirke
Last active March 13, 2021 19:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sfirke/03ae11b124071ad9c2a5e960e590b8d8 to your computer and use it in GitHub Desktop.
Save sfirke/03ae11b124071ad9c2a5e960e590b8d8 to your computer and use it in GitHub Desktop.
Analysis of janitor downloads
# Exploring download counts of a single package
x <- cranlogs::cran_downloads("janitor", from = "2016-10-03", to = "2021-03-12")
library(tidyverse)
library(lubridate)
library(tntpr) # from devtools::install_github("tntp/tntpr")
x$wday <- wday(x$date)
x$weekday <- ifelse(x$wday %in% c(1,7), "Weekend", "Weekday")
x$year <- tntpr::date_to_sy(x$date, as.Date("2016-10-02")) # segments into years using a cutoff date
ggplot(x, aes(x = date, y = count, color = weekday)) +
geom_smooth() +
geom_point() +
geom_vline(xintercept = c(as.Date("2016-10-03"),
as.Date("2016-10-31"),
as.Date("2017-05-06"),
as.Date("2018-01-04"),
as.Date("2018-03-22"),
as.Date("2018-07-18"),
as.Date("2018-07-31"),
as.Date("2019-04-20"),
as.Date("2020-01-22"),
as.Date("2020-04-08"),
as.Date("2020-04-12"),
as.Date("2021-01-05")),
alpha = 0.4) +
theme_minimal() +
labs(x = "Date", y = "Daily Downloads", color = "")
x %>%
group_by(year) %>%
summarise(dls = sum(count)) %>%
mutate(growth = dls / lag(dls))
# Rank and percentile vs. all CRAN packages
# Code from http://josiahparry.com/post/cran-2019/
# To download anew - this may run for over an hour
# Or you can download the .csv of this time period from my Github gist, see link below
library(rvest)
library(tidyverse)
url <- "https://cran.r-project.org/web/packages/available_packages_by_name.html"
cran_packages <- html_session(url) %>%
html_nodes("table a") %>%
html_text()
get_year_downloads <- function(pkg) {
cranlogs::cran_downloads(pkg,
from = "2021-01-01",
to = "2021-03-01") %>%
group_by(package) %>%
summarise(total_downloads = sum(count))
}
total_cran_downloads_2020 <- furrr::future_map_dfr(
cran_packages,
.f = possibly(get_year_downloads, otherwise = tibble())
)
# For the time period Jan-Feb 2021, I've posted the .csv here:
# https://gist.github.com/sfirke/7654da28c08db62fe70001eac4ecf0fe
# if already downloaded:
total_cran_downloads_2020 <- read_csv("cran_downloads_jan_feb_2021.csv")
total_cran_downloads_2020$percentile <- ecdf(total_cran_downloads_2020$total_downloads)(total_cran_downloads_2020$total_downloads)
total_cran_downloads_2020$rank <- min_rank(desc(total_cran_downloads_2020$total_downloads))
total_cran_downloads_2020 %>%
filter(package == "janitor")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment