Skip to content

Instantly share code, notes, and snippets.

@eduardofox2
Last active October 15, 2019 13:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eduardofox2/ada0b8a546d398bb9d828f74cf6d9ae6 to your computer and use it in GitHub Desktop.
Save eduardofox2/ada0b8a546d398bb9d828f74cf6d9ae6 to your computer and use it in GitHub Desktop.
R code prepared for improving graphs & transparency of a colleague's scientometrics analysis, published at https://olivre.com.br/brasil-x-portugal-quem-tem-mais-impacto-em-ciencias-da-vida-e-saude [IN PORTUGUESE, by Marcelo Hermes de Lima]
##############Script R escrito por Eduardo G P Fox em 10/10/2019#########################
#Relativo a um artigo enfatizando o impacto da ciencia nacional medido por Citations Per Paper em 2017, relativo a outros paises relacionados
#Por favor enviem comentarios sobre este script para meu site pessoal (https://ofoxofox.wixsite.com/research) e/ou Twitter @ofoxofox;
#Pacotes R utilizados
library(ggplot2)
library(stringr)
library(plyr)
library(reshape)
library(tidyverse)
library(ggsci)
#Caso faltar no GUI local, utilizar install.packages() para rodar o script abaixo
#Nova funcao para plotar graficos em conjunto
# Multiple plot function obtained from R CookBook Website
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
#Dados recuperados pelo Prof. Marcelo Hermes de Lima a partir da base www.scimagojr.com/ pela aba "Country Rank", relativos a Citations per Paper (CPP)
#Organizandos em listas
#Gerais
data<-list(
data.frame(
Bras=c(41, 47, 54, 46, 58, 48.5, 44, 37, 38, 40),
Port=c(81, 78, 81, 82, 72, 73, 36, 69, 56, 75),
Chil=c(54, 60, 61, 56, 71, 64, 64, 53, 48, 53),
Arge=c(50, 50, 58, 55, 66, 65, NA, 48, 40, 42),
Colo=c(42, 46, 43, 46, 59, 59, 49.5, 80, 56, 27)
),
data.frame(
Bras=c(1.3, 1.4, 2.8, 2.3, 2.4, 1.3, 3.7, 1.6, 0.7, 0.8),
Port=c(6.9, 5.7, 7.6, 7.9, 4.6, 5.6, NA, 6.7, 3.3, 5.8),
Chil=c(4.2, 3.6, 4.4, 4.5, 4.1, 3.8, 7.7, 4.7, 2.4, 2.6),
Arge=c(2.7, 3, 3.4, 4, 3.4, 4, NA, NA, 1.3, 1.1),
Colo=c(1.6, NA, 1, 2.6, NA, 3.1, NA, 8.2, 3.5, 0.3)
)
)
names(data)<-c("score", "rank")
#Media dos % do 1o lugar entre superareas, comparando Brasil e Portugal
data_compare<-list(
data.frame(
Brasil=c(41, 47, 54, 46, 58),
Portugal=c(81, 78, 81, 82, 72)
),
data.frame(
Brasil=c(48.5, 44, 37, 38, 40),
Portugal=c(73, 36, 69, 56, 75)
),
data.frame(
Brasil=c(18, 27, 32, 31),
Portugal=c(37, 54, 67, 60)
))
names(data_compare)<-c("Vida", "Saúde", "Humanas")
#Transformando dados em tabela e acrescentando nomes de areas de estudo
data.df<-as.data.frame(data)
rownames(data.df)<-c("Bio-Agro", "Bioquim", "Farmacol", "Imuno", "Neuro", "Medicina", "Odonto", "Enferm", "Psico", "Vet")
rank.data<-melt(t(data.df))%>% filter(str_detect(X1,"rank"))
score.data<-melt(t(data.df))%>% filter(str_detect(X1,"score"))
#reordenando fatores de dados conforme a estetica de plotagem adotada pelo MHL
rank.data$X1<-factor(rank.data$X1, levels = c("rank.Bras", "rank.Port", "rank.Arge", "rank.Chil", "rank.Colo"))
score.data$X1<-factor(score.data$X1, levels = c("score.Bras", "score.Port", "score.Arge", "score.Chil", "score.Colo"))
#Gerando estatistica descritiva por paises conforme constava no arquivo Excel original (util depois)
summary.data<-ddply(melt(data.df), c("variable"), summarise, min=min(value, na.rm=T), max =max(value, na.rm=T),mean=mean(value, na.rm=T), sd=sd(value, na.rm=T), se = sd/sqrt(length(value)))
#Plot dados em nuvem de pontos conforme constava no arquivo excel original
plot(melt(data[2])$value, melt(data[1])$value)
#Graficos que aparecem no artigo de divulgacao
#Plot de colunas, entre areas de estudo
ggplot(score.data, aes(fill=X2, y=value, x=X1))+
geom_bar(position="dodge", stat="identity", width = 0.8)+
scale_fill_jco()+
scale_x_discrete(name= '\nPaíses', labels = c("Brasil","Portugal", "Argentina","Chile","Colômbia"), expand = c(0.02, 0))+
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+
labs(title = "Percentual do CPP do 1o Lugar", font="bold", size=20, fill = "Área de Estudo")+
theme_light()+
theme(
plot.title = element_text(color="red", size=20, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12),
legend.title=element_text(size=12),
legend.text=element_text(size=11))
#Boxplot entre paises por escore
ggplot(score.data, aes(fill=X1, y=value, x=X1))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 1.2) +
geom_boxplot(width = 0.7, lwd = 1.2, fatten = 1, outlier.shape=NA)+
geom_jitter(width=0.1,alpha=0.6)+
scale_fill_jco()+
scale_x_discrete(name= 'Países', labels = c("Brasil","Portugal", "Argentina","Chile","Colômbia"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+
labs(title = '% do CPP do 1o Lugar', font="bold", size=16, fill = "Area de Estudo")+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=20, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold"),
axis.title.y = element_text(size=20, face="bold"),
axis.text.x=element_text(size=14, angle = 45, hjust = 1),
axis.text.y=element_text(size=12))
#Boxplot entre paises por ranque
ggplot(rank.data, aes(fill=X1, y=value, x=X1))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 1.2) +
geom_boxplot(width = 0.7, lwd = 1.2, fatten = 1, outlier.shape=NA)+
geom_jitter(width=0.1,alpha=0.5)+
scale_fill_jco()+
scale_x_discrete(name= 'Países', labels = c("Brasil","Portugal", "Argentina","Chile","Colômbia"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,10), breaks=c(0,2.5,5.0,7.5,10.0), expand = c(0, 0))+
labs(title = 'Rank-Score: Vida e Saúde', font="bold", size=16, fill = "Area de Estudo")+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=20, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold"),
axis.title.y = element_text(size=20, face="bold"),
axis.text.x=element_text(size=14, angle = 45, hjust = 1),
axis.text.y=element_text(size=12))
#Boxplots suplementares comparando Brasil e Portugal (separados para decidir utilidade)
#Preparando
#Vida
Vida<-ggplot(melt(data_compare)[1:10,], aes(fill=variable, y=value, x=variable))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 1.2) +
geom_boxplot(width = 0.7, lwd = 1.2, fatten = 1)+
scale_fill_jco()+
scale_x_discrete(name= '\nPaíses', labels = c("Brasil","Portugal"), expand = c(0.1, 0))+
scale_y_continuous(name= 'Percentual do CPP do 1o Lugar', limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+
labs(title = "Vida", font="bold", size=16)+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold",color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Saúde
Saude<-ggplot(melt(data_compare)[11:20,], aes(fill=variable, y=value, x=variable))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 1.2) +
geom_boxplot(width = 0.7, lwd = 1.2, fatten = 1)+
scale_fill_jco()+
scale_x_discrete(name= '\nPaíses', labels = c("Brasil","Portugal"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+
labs(title = "Saúde", font="bold", size=16)+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Humanas
Humanas<-ggplot(melt(data_compare)[21:28,], aes(fill=variable, y=value, x=variable))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 1.2) +
geom_boxplot(width = 0.7, lwd = 1.2, fatten = 1)+
scale_fill_jco()+
scale_x_discrete(name= '\nPaíses', labels = c("Brasil","Portugal"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+
labs(title = "Humanas", font="bold", size=16)+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Plotando o conjunto
multiplot(Vida, Saude, Humanas, cols=3)
##########################################
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment