Skip to content

Instantly share code, notes, and snippets.

@njahn82
Last active April 6, 2020 16:12
Show Gist options
  • Save njahn82/b598376b2affe47681b61e273e041b55 to your computer and use it in GitHub Desktop.
Save njahn82/b598376b2affe47681b61e273e041b55 to your computer and use it in GitHub Desktop.
library(europepmc)
library(tidyverse)
library(cowplot)
#> 
#> ********************************************************
#> Note: As of version 1.0.0, cowplot does not change the
#>   default ggplot2 theme anymore. To recover the previous
#>   behavior, execute:
#>   theme_set(theme_cowplot())
#> ********************************************************
library(colorblindr)
#> Loading required package: colorspace
# get data from Europe PMC
epmc_data <-
  europepmc::epmc_search('("COVID-19" OR "SARS-CoV-2") AND (SRC:PPR)', limit = 10000, "raw")
#> 1392 records found, returning 1392
# select data variables and transform them
preprint_df <- epmc_data %>%
  tibble::tibble(
    preprint_source = purrr::map(., "bookOrReportDetails") %>%
      purrr::map_chr("publisher"),
    date = purrr::map_chr(., "firstPublicationDate"),
    doi = purrr::map_chr(., "doi")
  ) %>%
  mutate(date = lubridate::ymd(date)) %>%
  mutate(preprint_source = fct_infreq(preprint_source)) %>%
  group_by(date, preprint_source) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  group_by(preprint_source) %>%
  mutate(cumsum_n = cumsum(n))
preprint_df
#> # A tibble: 200 x 4
#> # Groups:   preprint_source [8]
#>    date       preprint_source     n cumsum_n
#>    <date>     <fct>           <int>    <int>
#>  1 2020-01-28 medRxiv             1        1
#>  2 2020-01-30 medRxiv             1        2
#>  3 2020-01-30 bioRxiv             1        1
#>  4 2020-02-02 medRxiv             4        6
#>  5 2020-02-03 bioRxiv             1        2
#>  6 2020-02-04 bioRxiv             2        4
#>  7 2020-02-05 medRxiv             2        8
#>  8 2020-02-06 medRxiv             1        9
#>  9 2020-02-11 medRxiv             4       13
#> 10 2020-02-11 bioRxiv             5        9
#> # … with 190 more rows
# plot
plot <- ggplot(
  preprint_df,
  aes(date, cumsum_n, group = preprint_source, cumsum_n, colour = preprint_source)
) +
  geom_line(stat = "identity", size = 1.1) +
  scale_color_manual("Source", values = colorblindr::palette_OkabeIto) +
  theme_minimal_grid() +
  labs(
    y = NULL,
    x = NULL,
    title = "COVID-19 Preprints in Europe PMC",
    subtitle = "Cumulative daily growth by source"
  ) +
  theme(plot.title.position = "plot")
plot

ggsave("epmc_growth.png", plot)
#> Saving 7 x 5 in image

Created on 2020-04-06 by the reprex package (v0.3.0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment