Skip to content

Instantly share code, notes, and snippets.

@matthewgthomas
Created April 24, 2015 16:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matthewgthomas/50c4cd26b672acd0813b to your computer and use it in GitHub Desktop.
Save matthewgthomas/50c4cd26b672acd0813b to your computer and use it in GitHub Desktop.
Code to create the graphs for my "evolution of cooperation isn't so puzzling" article: http://www.matthewgthomas.co.uk/my-research/the-evolution-of-cooperation-isnt-so-puzzling
##
## Code to create the graphs for my "evolution of cooperation isn't so puzzling" article: http://www.matthewgthomas.co.uk/my-research/the-evolution-of-cooperation-isnt-so-puzzling
## Data is available from http://matthewgthomas.co.uk/data/cooperation-puzzle.zip
##
## Written by Matthew Gwynfryn Thomas
## Date: 24 April 2015
##
## Copyright (c) 2015 Matthew Gwynfryn Thomas
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to deal
## in the Software without restriction, including without limitation the rights
## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
## ## copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in all
## copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
## SOFTWARE.
##
library(plyr)
library(reshape2)
library(ggplot2)
#install.packages("wesanderson")
library(wesanderson)
## for installing plotly
#install.packages("devtools")
#library("devtools")
#install_github("ropensci/plotly")
## for upgrading plotly
#devtools::install_github("ropensci/plotly")
library(plotly)
####################################################################
## Load Europe PMC data
##
# load articles from search term (evolution AND cooperation) -- multiple files
tsv_files = list.files("./europmc-all-papers/", pattern="*tsv", full.names=T) # find all .tsv files
europmc.all = do.call("rbind", lapply(tsv_files, function(x) read.csv(x, stringsAsFactors=F, sep="\t"))) # load into a single dataframe
# load articles from search term (evolution AND cooperation AND puzzl*) -- single file
europmc.puzzle = read.csv("europepmc - puzzle 1.tsv", sep="\t")
####################################################################
## Summarise counts by journal
##
## puzzle articles
europmc.puzzle.sum = ddply(europmc.puzzle, .(JOURNAL), summarise, count=length(JOURNAL))
## all articles
europmc.all.sum = ddply(europmc.all, .(JOURNAL), summarise, count=length(JOURNAL))
europmc = merge(europmc.puzzle.sum, europmc.all.sum, by="JOURNAL", all.x=T)
names(europmc) = c("journal", "puzzle", "all")
europmc$prop = europmc$puzzle / europmc$all # proportion of coop papers containing 'puzzl*'
europmc = europmc[ order(-europmc$all), ] # order by descending count of all coop papers
####################################################################
## Bar chart of top ten journals publishing coop articles
##
europmc.melt = melt(europmc[1:10,], id.vars="journal", measure.vars=c("puzzle", "all"),
variable.name="Type", value.name="Count") # reshape for stacked barchart
# reset factor and levels
europmc.melt$journal = as.character(europmc.melt$journal)
europmc.melt$journal = as.factor(europmc.melt$journal)
# reorder levels according to count so highest counts appear at the top of the graph
europmc.melt$journal = with(europmc.melt, reorder(factor(journal), Count, sum))
# bar chart of all articles stacked with 'puzzle' articles
(journals = ggplot(europmc.melt, aes(x=journal, y=Count, fill=Type)) +
geom_bar(stat="identity") +
coord_flip() +
xlab("") +
ylab("Number of cooperation articles, puzzling or not") +
theme_bw() +
scale_fill_manual(values = wes_palette("Zissou")) + # Cavalcanti
#eliminates baground, gridlines, and chart border
theme(
plot.background = element_blank()
,panel.grid.major = element_blank()
,panel.grid.minor = element_blank()
,panel.border = element_blank()
,panel.background = element_blank()
) + theme(axis.line = element_line(color = 'black')))
####################################################################
## Trends over time
##
## puzzle articles
europmc.puzzle.time = ddply(europmc.puzzle, .(PUBLICATION_YEAR), summarise, count=length(PUBLICATION_YEAR))
## all articles
europmc.all.time = ddply(europmc.all, .(PUBLICATION_YEAR), summarise, count=length(PUBLICATION_YEAR))
europmc.time = merge(europmc.puzzle.time, europmc.all.time, by="PUBLICATION_YEAR", all.x=T)
names(europmc.time) = c("year", "puzzle", "all")
europmc.time$prop = europmc.time$puzzle / europmc.time$all # proportion of coop papers containing 'puzzl*'
# when was cooperation least puzzling?
subset(europmc.time, prop == min(europmc.time$prop))
# 1976!
# who found it puzzling?
subset(europmc.puzzle, PUBLICATION_YEAR==1976)
# plot proportion of puzzle papers over time
(puzzle_over_time = ggplot(europmc.time, aes(x=year, y=prop)) +
geom_line(colour=wes_palette("GrandBudapest")[1], size=1.5) +
ylab("Occurrences of \"evolution\", \"cooperation\" and \"puzzle\"") +
xlab("Year") +
#scale_color_manual(values = wes_palette("GrandBudapest")) +
theme_bw() +
#eliminates baground, gridlines, and chart border
theme(
plot.background = element_blank()
,panel.grid.major = element_blank()
,panel.grid.minor = element_blank()
,panel.border = element_blank()
,panel.background = element_blank()
) + theme(axis.line = element_line(color = 'black')))
####################################################################
## Plot counts of puzzle/all papers over time
##
europmc.melt = melt(europmc.time, id.vars="year", measure.vars=c("puzzle", "all"),
variable.name="type", value.name="count") # reshape for stacked barchart
# plot proportion of puzzle papers over time
(trends = ggplot(europmc.melt, aes(x=year, y=count, colour=type)) +
geom_line(size=1.5) +
ylab("Occurrences of \"evolution\", \"cooperation\" and \"puzzle\"") +
xlab("Year") +
scale_color_manual(values = wes_palette("Royal1")) +
theme_bw() +
#eliminates baground, gridlines, and chart border
theme(
plot.background = element_blank()
,panel.grid.major = element_blank()
,panel.grid.minor = element_blank()
,panel.border = element_blank()
,panel.background = element_blank()
) + theme(axis.line = element_line(color = 'black')))
####################################################################
## Save plots to plotly
##
py <- plotly()
journals_py = py$ggplotly(journals)
journals_py$response$url # get url of plot
puzzle_over_time_py = py$ggplotly(puzzle_over_time)
puzzle_over_time_py$response$url # get url of plot
trends_py = py$ggplotly(trends)
trends_py$response$url # get url of plot
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment