Install
install.packages(c("dplyr", "ggplot2", "rcrossref"))
Load
library("rcrossref")
library("dplyr")
library("ggplot2")
Get total number of members
tot <- cr_members(limit = 1)
tot$meta$total_results
#> [1] 7622
res <- lapply(seq(1, tot$meta$total_results, 500), function(x) {
cr_members(limit = 500, offset = x)
})
(res_df <- rbind_all(lapply(res, "[[", "data")))
#> # A tibble: 7,621 × 48
#> id primary_name
#> <int> <chr>
#> 1 324 Ebsco Publishing
#> 2 10 American Medical Association (AMA)
#> 3 311 Wiley-Blackwell
#> 4 78 Elsevier BV
#> 5 297 Springer Nature
#> 6 339 Springer Nature
#> 7 56 Cambridge University Press (CUP)
#> 8 316 American Chemical Society (ACS)
#> 9 13 American Geophysical Union (AGU)
#> 10 15 American Psychological Association (APA)
#> # ... with 7,611 more rows, and 46 more variables: location <chr>,
#> # last_status_check_time <date>, total.dois <chr>, current.dois <chr>,
#> # backfile.dois <chr>, prefixes <chr>,
#> # coverge.affiliations.current <chr>, coverge.funders.backfile <chr>,
#> # coverge.licenses.backfile <chr>, coverge.funders.current <chr>,
#> # coverge.affiliations.backfile <chr>,
#> # coverge.resource.links.backfile <chr>, coverge.orcids.backfile <chr>,
#> # coverge.update.policies.current <chr>, coverge.orcids.current <chr>,
#> # coverge.references.backfile <chr>,
#> # coverge.award.numbers.backfile <chr>,
#> # coverge.update.policies.backfile <chr>,
#> # coverge.licenses.current <chr>, coverge.award.numbers.current <chr>,
#> # coverge.abstracts.backfile <chr>,
#> # coverge.resource.links.current <chr>, coverge.abstracts.current <chr>,
#> # coverge.references.current <chr>,
#> # flags.deposits.abstracts.current <chr>,
#> # flags.deposits.orcids.current <chr>, flags.deposits <chr>,
#> # flags.deposits.affiliations.backfile <chr>,
#> # flags.deposits.update.policies.backfile <chr>,
#> # flags.deposits.award.numbers.current <chr>,
#> # flags.deposits.resource.links.current <chr>,
#> # flags.deposits.articles <chr>,
#> # flags.deposits.affiliations.current <chr>,
#> # flags.deposits.funders.current <chr>,
#> # flags.deposits.references.backfile <chr>,
#> # flags.deposits.abstracts.backfile <chr>,
#> # flags.deposits.licenses.backfile <chr>,
#> # flags.deposits.award.numbers.backfile <chr>,
#> # flags.deposits.references.current <chr>,
#> # flags.deposits.resource.links.backfile <chr>,
#> # flags.deposits.orcids.backfile <chr>,
#> # flags.deposits.funders.backfile <chr>,
#> # flags.deposits.update.policies.current <chr>,
#> # flags.deposits.licenses.current <chr>, names <chr>, tokens <chr>
Here's the IDs of all members in the id
column, all 7621 of them
df <- res_df %>%
select(id, primary_name, total.dois, coverage = coverge.resource.links.current)
df$coverage <- as.numeric(df$coverage)
df$total.dois <- as.numeric(df$total.dois)
wowsers, lots of publishers have no data
ggplot(df, aes(x = coverage)) +
geom_histogram(binwidth = 0.01) +
theme_grey(base_size = 18)
Remove zeros
the publishsers with non-zero coverage values
dfnozeros <- df %>% filter(coverage > 0)
ggplot(dfnozeros, aes(x = coverage)) +
geom_histogram(binwidth = 0.03) +
theme_grey(base_size = 18)
Who's doing the best? Well, 202 have coverage of 100%, so let's pick the ones with high coverage and the most DOIs:
df_high <- dfnozeros %>%
filter(coverage == 1) %>%
arrange(desc(total.dois))
knitr::kable(df_high[1:10,])
id | primary_name | total.dois | coverage |
---|---|---|---|
16 | American Physical Society (APS) | 601309 | 1 |
98 | Hindawi Limited | 156112 | 1 |
1747 | American Dairy Science Association | 31144 | 1 |
5967 | Wydawnictwo SIGMA-NOT, sp. z.o.o. | 6078 | 1 |
3614 | Polish Botanical Society | 5014 | 1 |
4613 | American Institute of Ultrasound in Medicine | 4321 | 1 |
3500 | Brazilian Society for Computational and Applied Mathematics (SBMAC) | 2133 | 1 |
4931 | Fundacao Getulio Vargas | 2103 | 1 |
2359 | The Econometric Society | 2005 | 1 |
4024 | Lithuanian Academy of Sciences | 1890 | 1 |
What about some of the big, well-known publishers + cool open access publishers (subjective, I know)?
pubs <- c("Elsevier", "Springer Publishing Company", "Wiley", "PLOS", "elife", "peerj")
pubids <- vapply(pubs, function(z) cr_members(query = z)$data$id, numeric(1))
res <- df %>%
filter(id %in% pubids) %>%
arrange(desc(coverage))
knitr::kable(res)
id | primary_name | total.dois | coverage |
---|---|---|---|
78 | Elsevier BV | 14925065 | 0.9997314 |
311 | Wiley-Blackwell | 8147672 | 0.9288410 |
4443 | PeerJ | 3057 | 0.7816994 |
4374 | eLife Sciences Organisation, Ltd. | 3433 | 0.2767241 |
340 | Public Library of Science (PLoS) | 201493 | 0.0000000 |
793 | Springer Publishing Company | 7727 | 0.0000000 |
We have some work to do, eh?