Last active
October 20, 2016 15:55
-
-
Save dhimmel/b1fc378408d830384b47184a8258d5b7 to your computer and use it in GitHub Desktop.
PubMed growth over time
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Growth of the PubMed corpus over time" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"library(magrittr)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Parsed with column specification:\n", | |
"cols(\n", | |
" year = col_integer(),\n", | |
" n_journals = col_integer(),\n", | |
" n_articles = col_integer()\n", | |
")\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<table>\n", | |
"<thead><tr><th scope=col>year</th><th scope=col>n_journals</th><th scope=col>n_articles</th></tr></thead>\n", | |
"<tbody>\n", | |
"\t<tr><td>1960 </td><td>1925 </td><td>111955</td></tr>\n", | |
"\t<tr><td>1961 </td><td>2440 </td><td>119933</td></tr>\n", | |
"</tbody>\n", | |
"</table>\n" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|lll}\n", | |
" year & n\\_journals & n\\_articles\\\\\n", | |
"\\hline\n", | |
"\t 1960 & 1925 & 111955\\\\\n", | |
"\t 1961 & 2440 & 119933\\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
" year n_journals n_articles\n", | |
"1 1960 1925 111955 \n", | |
"2 1961 2440 119933 " | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"url = 'https://github.com/dhimmel/delays/raw/2d05dbaf2d8eaf50c35533261ba4c29b70c350a8/data/yearly-pubmed-totals.tsv'\n", | |
"pubmed_df = readr::read_tsv(url)\n", | |
"head(pubmed_df, 2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"gg = pubmed_df %>%\n", | |
" dplyr::mutate(cum_articles = cumsum(n_articles) / 1e6) %>%\n", | |
" ggplot2::ggplot(ggplot2::aes(year, cum_articles)) +\n", | |
" ggplot2::geom_line(color='#4c814c') +\n", | |
" ggplot2::geom_point(color='#004c00', size=1) +\n", | |
" ggplot2::scale_x_continuous(breaks = seq(1960, 2020, 10)) +\n", | |
" ggplot2::xlab(NULL) +\n", | |
" ggplot2::ylab('Total journal articles in PubMed\\nsince 1960 (millions)') +\n", | |
" hetior::theme_dhimmel() +\n", | |
" ggplot2::theme(\n", | |
" panel.background = ggplot2::element_blank(),\n", | |
" plot.background = ggplot2::element_rect(fill = 'transparent', color = NA)\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"ggplot2::ggsave(filename='pubmed-over-time.svg', plot = gg, height = 3.5, width = 5, bg = 'transparent')\n", | |
"ggplot2::ggsave(filename='pubmed-over-time.png', plot = gg, height = 3.5, width = 5, bg = 'transparent', dpi=300)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Session Info for reproducibility" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"R version 3.3.1 (2016-06-21)\n", | |
"Platform: x86_64-pc-linux-gnu (64-bit)\n", | |
"Running under: Ubuntu 16.04.1 LTS\n", | |
"\n", | |
"locale:\n", | |
" [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C \n", | |
" [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 \n", | |
" [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 \n", | |
" [7] LC_PAPER=en_US.UTF-8 LC_NAME=C \n", | |
" [9] LC_ADDRESS=C LC_TELEPHONE=C \n", | |
"[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C \n", | |
"\n", | |
"attached base packages:\n", | |
"[1] stats graphics grDevices utils datasets methods base \n", | |
"\n", | |
"other attached packages:\n", | |
"[1] gdtools_0.0.7 magrittr_1.5 \n", | |
"\n", | |
"loaded via a namespace (and not attached):\n", | |
" [1] Rcpp_0.12.6 munsell_0.4.3 uuid_0.1-2 colorspace_1.2-6 \n", | |
" [5] R6_2.1.2 stringr_1.0.0 plyr_1.8.4 dplyr_0.5.0 \n", | |
" [9] tools_3.3.1 grid_3.3.1 gtable_0.2.0 DBI_0.4-1 \n", | |
"[13] lazyeval_0.2.0 assertthat_0.1 digest_0.6.10 tibble_1.1 \n", | |
"[17] IRdisplay_0.4.9000 readr_1.0.0 ggplot2_2.1.0 repr_0.9 \n", | |
"[21] hetior_0.0.0.9000 IRkernel_0.6 curl_1.1 evaluate_0.9 \n", | |
"[25] labeling_0.3 pbdZMQ_0.2-3 stringi_1.1.1 scales_0.4.0 \n", | |
"[29] svglite_1.1.0 jsonlite_1.0 " | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"sessionInfo()" | |
] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "R", | |
"language": "R", | |
"name": "ir" | |
}, | |
"language_info": { | |
"codemirror_mode": "r", | |
"file_extension": ".r", | |
"mimetype": "text/x-r-source", | |
"name": "R", | |
"pygments_lexer": "r", | |
"version": "3.3.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment