library(europepmc)
library(tidyverse)
library(cowplot)
#>
#> ********************************************************
#> Note: As of version 1.0.0, cowplot does not change the
#> default ggplot2 theme anymore. To recover the previous
#> behavior, execute:
#> theme_set(theme_cowplot())
#> ********************************************************
library(colorblindr)
#> Loading required package: colorspace
# get data from Europe PMC
epmc_data <-
europepmc::epmc_search('("COVID-19" OR "SARS-CoV-2") AND (SRC:PPR)', limit = 10000, "raw")
#> 1392 records found, returning 1392
# select data variables and transform them
preprint_df <- epmc_data %>%
tibble::tibble(
preprint_source = purrr::map(., "bookOrReportDetails") %>%
purrr::map_chr("publisher"),
date = purrr::map_chr(., "firstPublicationDate"),
doi = purrr::map_chr(., "doi")
) %>%
mutate(date = lubridate::ymd(date)) %>%
mutate(preprint_source = fct_infreq(preprint_source)) %>%
group_by(date, preprint_source) %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(preprint_source) %>%
mutate(cumsum_n = cumsum(n))
preprint_df
#> # A tibble: 200 x 4
#> # Groups: preprint_source [8]
#> date preprint_source n cumsum_n
#> <date> <fct> <int> <int>
#> 1 2020-01-28 medRxiv 1 1
#> 2 2020-01-30 medRxiv 1 2
#> 3 2020-01-30 bioRxiv 1 1
#> 4 2020-02-02 medRxiv 4 6
#> 5 2020-02-03 bioRxiv 1 2
#> 6 2020-02-04 bioRxiv 2 4
#> 7 2020-02-05 medRxiv 2 8
#> 8 2020-02-06 medRxiv 1 9
#> 9 2020-02-11 medRxiv 4 13
#> 10 2020-02-11 bioRxiv 5 9
#> # … with 190 more rows
# plot
plot <- ggplot(
preprint_df,
aes(date, cumsum_n, group = preprint_source, cumsum_n, colour = preprint_source)
) +
geom_line(stat = "identity", size = 1.1) +
scale_color_manual("Source", values = colorblindr::palette_OkabeIto) +
theme_minimal_grid() +
labs(
y = NULL,
x = NULL,
title = "COVID-19 Preprints in Europe PMC",
subtitle = "Cumulative daily growth by source"
) +
theme(plot.title.position = "plot")
plot
ggsave("epmc_growth.png", plot)
#> Saving 7 x 5 in image
Created on 2020-04-06 by the reprex package (v0.3.0)