library(tidyverse)
library(jsonlite)
#>
#> Attaching package: 'jsonlite'
#> The following object is masked from 'package:purrr':
#>
#> flatten
base_df <- jsonlite::stream_in(file("~/Downloads/base_dois.json"), verbose = FALSE)
#> Warning in readLines(con, n = pagesize, encoding = "UTF-8"): incomplete
#> final line found on '~/Downloads/base_dois.json'
my_base <- base_df$response %>%
as_tibble() %>%
select(doi_kb = doi, docs) %>%
unnest(docs, keep_empty = TRUE)
my_base
#> # A tibble: 20,105 x 12
#> doi_kb oastatus titel collection reptype doi provider link type
#> <chr> <chr> <chr> <chr> <chr> <lis> <chr> <chr> <chr>
#> 1 10.10… unknown Tran… ftleedsun… Hochsc… <chr… White R… http… arti…
#> 2 10.10… open Tran… crelsevie… E-Jour… <chr… Science… http… arti…
#> 3 10.10… unknown "Tab… ftdatacite Forsch… <chr… DataCit… http… data…
#> 4 10.10… unknown "Tab… ftdatacite Forsch… <chr… DataCit… http… data…
#> 5 10.10… unknown "Tra… ftdatacite Forsch… <chr… DataCit… http… unkn…
#> 6 10.10… unknown "Tab… ftdatacite Forsch… <chr… DataCit… http… data…
#> 7 10.10… unknown "Tab… ftdatacite Forsch… <chr… DataCit… http… data…
#> 8 10.10… unknown Tran… ftunivrom… Hochsc… <chr… Sapienz… http… arti…
#> 9 10.10… unknown "Tra… fthalin2p3 Hochsc… <chr… HAL-IN2… http… arti…
#> 10 10.10… open Tran… ftcopenha… Hochsc… <chr… Univers… http… arti…
#> # … with 20,095 more rows, and 3 more variables: date <chr>,
#> # creator <chr>, rights <chr>
# some analysis
my_base %>%
group_by(collection) %>%
summarise(n = n_distinct(doi_kb)) %>%
arrange(desc(n))
#> # A tibble: 621 x 2
#> collection n
#> <chr> <int>
#> 1 <NA> 3332
#> 2 ftpubmed 2759
#> 3 ftdoajarticles 1755
#> 4 ftarxivpreprints 866
#> 5 crelsevierbv 798
#> 6 crplos 453
#> 7 ftpubman 444
#> 8 fthighwire 344
#> 9 ftccsdartic 297
#> 10 ftdatacite 285
#> # … with 611 more rows
Created on 2019-10-17 by the reprex package (v0.3.0)
Session info
devtools::session_info()
#> ─ Session info ──────────────────────────────────────────────────────────
#> setting value
#> version R version 3.6.1 (2019-07-05)
#> os macOS Mojave 10.14.6
#> system x86_64, darwin15.6.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Europe/Berlin
#> date 2019-10-17
#>
#> ─ Packages ──────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0)
#> backports 1.1.5 2019-10-02 [1] CRAN (R 3.6.0)
#> broom 0.5.2 2019-04-07 [1] CRAN (R 3.6.0)
#> callr 3.3.1 2019-07-18 [1] CRAN (R 3.6.0)
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 3.6.0)
#> cli 1.1.0 2019-03-19 [1] CRAN (R 3.6.0)
#> colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.6.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 3.6.0)
#> devtools 2.1.0 2019-07-06 [1] CRAN (R 3.6.0)
#> digest 0.6.21 2019-09-20 [1] CRAN (R 3.6.0)
#> dplyr * 0.8.3 2019-07-04 [1] CRAN (R 3.6.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0)
#> fansi 0.4.0 2018-10-05 [1] CRAN (R 3.6.0)
#> forcats * 0.4.0 2019-02-17 [1] CRAN (R 3.6.0)
#> fs 1.3.1 2019-05-06 [1] CRAN (R 3.6.0)
#> generics 0.0.2 2018-11-29 [1] CRAN (R 3.6.0)
#> ggplot2 * 3.2.1 2019-08-10 [1] CRAN (R 3.6.0)
#> glue 1.3.1 2019-03-12 [1] CRAN (R 3.6.0)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.0)
#> haven 2.1.1 2019-07-04 [1] CRAN (R 3.6.0)
#> highr 0.8 2019-03-20 [1] CRAN (R 3.6.0)
#> hms 0.5.1 2019-08-23 [1] CRAN (R 3.6.0)
#> htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.0)
#> httr 1.4.1 2019-08-05 [1] CRAN (R 3.6.0)
#> jsonlite * 1.6 2018-12-07 [1] CRAN (R 3.6.0)
#> knitr 1.24 2019-08-08 [1] CRAN (R 3.6.1)
#> lattice 0.20-38 2018-11-04 [1] CRAN (R 3.6.1)
#> lazyeval 0.2.2 2019-03-15 [1] CRAN (R 3.6.0)
#> lifecycle 0.1.0 2019-08-01 [1] CRAN (R 3.6.0)
#> lubridate 1.7.4 2018-04-11 [1] CRAN (R 3.6.0)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 3.6.0)
#> modelr 0.1.5 2019-08-08 [1] CRAN (R 3.6.0)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 3.6.0)
#> nlme 3.1-140 2019-05-12 [1] CRAN (R 3.6.1)
#> pillar 1.4.2 2019-06-29 [1] CRAN (R 3.6.0)
#> pkgbuild 1.0.5 2019-08-26 [1] CRAN (R 3.6.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 3.6.0)
#> pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.6.0)
#> prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.6.0)
#> processx 3.4.1 2019-07-18 [1] CRAN (R 3.6.0)
#> ps 1.3.0 2018-12-21 [1] CRAN (R 3.6.0)
#> purrr * 0.3.2 2019-03-15 [1] CRAN (R 3.6.0)
#> R6 2.4.0 2019-02-14 [1] CRAN (R 3.6.0)
#> Rcpp 1.0.2 2019-07-25 [1] CRAN (R 3.6.0)
#> readr * 1.3.1 2018-12-21 [1] CRAN (R 3.6.0)
#> readxl 1.3.1 2019-03-13 [1] CRAN (R 3.6.0)
#> remotes 2.1.0 2019-06-24 [1] CRAN (R 3.6.0)
#> rlang 0.4.0 2019-06-25 [1] CRAN (R 3.6.0)
#> rmarkdown 1.15 2019-08-21 [1] CRAN (R 3.6.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.6.0)
#> rvest 0.3.4 2019-05-15 [1] CRAN (R 3.6.0)
#> scales 1.0.0 2018-08-09 [1] CRAN (R 3.6.0)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0)
#> stringi 1.4.3 2019-03-12 [1] CRAN (R 3.6.0)
#> stringr * 1.4.0 2019-02-10 [1] CRAN (R 3.6.0)
#> testthat 2.2.1 2019-07-25 [1] CRAN (R 3.6.0)
#> tibble * 2.1.3 2019-06-06 [1] CRAN (R 3.6.0)
#> tidyr * 1.0.0 2019-09-11 [1] CRAN (R 3.6.0)
#> tidyselect 0.2.5 2018-10-11 [1] CRAN (R 3.6.0)
#> tidyverse * 1.2.1 2017-11-14 [1] CRAN (R 3.6.0)
#> usethis 1.5.1 2019-07-04 [1] CRAN (R 3.6.0)
#> utf8 1.1.4 2018-05-24 [1] CRAN (R 3.6.0)
#> vctrs 0.2.0 2019-07-05 [1] CRAN (R 3.6.0)
#> withr 2.1.2 2018-03-15 [1] CRAN (R 3.6.0)
#> xfun 0.9 2019-08-21 [1] CRAN (R 3.6.0)
#> xml2 1.2.2 2019-08-09 [1] CRAN (R 3.6.0)
#> yaml 2.2.0 2018-07-25 [1] CRAN (R 3.6.0)
#> zeallot 0.1.0 2018-01-28 [1] CRAN (R 3.6.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library