Last active
October 15, 2019 13:18
-
-
Save eduardofox2/ada0b8a546d398bb9d828f74cf6d9ae6 to your computer and use it in GitHub Desktop.
R code prepared for improving graphs & transparency of a colleague's scientometrics analysis, published at https://olivre.com.br/brasil-x-portugal-quem-tem-mais-impacto-em-ciencias-da-vida-e-saude [IN PORTUGUESE, by Marcelo Hermes de Lima]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##############Script R escrito por Eduardo G P Fox em 10/10/2019######################### | |
#Relativo a um artigo enfatizando o impacto da ciencia nacional medido por Citations Per Paper em 2017, relativo a outros paises relacionados | |
#Por favor enviem comentarios sobre este script para meu site pessoal (https://ofoxofox.wixsite.com/research) e/ou Twitter @ofoxofox; | |
#Pacotes R utilizados | |
library(ggplot2) | |
library(stringr) | |
library(plyr) | |
library(reshape) | |
library(tidyverse) | |
library(ggsci) | |
#Caso faltar no GUI local, utilizar install.packages() para rodar o script abaixo | |
#Nova funcao para plotar graficos em conjunto | |
# Multiple plot function obtained from R CookBook Website | |
# | |
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) | |
# - cols: Number of columns in layout | |
# - layout: A matrix specifying the layout. If present, 'cols' is ignored. | |
# | |
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), | |
# then plot 1 will go in the upper left, 2 will go in the upper right, and | |
# 3 will go all the way across the bottom. | |
# | |
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { | |
library(grid) | |
# Make a list from the ... arguments and plotlist | |
plots <- c(list(...), plotlist) | |
numPlots = length(plots) | |
# If layout is NULL, then use 'cols' to determine layout | |
if (is.null(layout)) { | |
# Make the panel | |
# ncol: Number of columns of plots | |
# nrow: Number of rows needed, calculated from # of cols | |
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), | |
ncol = cols, nrow = ceiling(numPlots/cols)) | |
} | |
if (numPlots==1) { | |
print(plots[[1]]) | |
} else { | |
# Set up the page | |
grid.newpage() | |
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) | |
# Make each plot, in the correct location | |
for (i in 1:numPlots) { | |
# Get the i,j matrix positions of the regions that contain this subplot | |
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) | |
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, | |
layout.pos.col = matchidx$col)) | |
} | |
} | |
} | |
#Dados recuperados pelo Prof. Marcelo Hermes de Lima a partir da base www.scimagojr.com/ pela aba "Country Rank", relativos a Citations per Paper (CPP) | |
#Organizandos em listas | |
#Gerais | |
data<-list( | |
data.frame( | |
Bras=c(41, 47, 54, 46, 58, 48.5, 44, 37, 38, 40), | |
Port=c(81, 78, 81, 82, 72, 73, 36, 69, 56, 75), | |
Chil=c(54, 60, 61, 56, 71, 64, 64, 53, 48, 53), | |
Arge=c(50, 50, 58, 55, 66, 65, NA, 48, 40, 42), | |
Colo=c(42, 46, 43, 46, 59, 59, 49.5, 80, 56, 27) | |
), | |
data.frame( | |
Bras=c(1.3, 1.4, 2.8, 2.3, 2.4, 1.3, 3.7, 1.6, 0.7, 0.8), | |
Port=c(6.9, 5.7, 7.6, 7.9, 4.6, 5.6, NA, 6.7, 3.3, 5.8), | |
Chil=c(4.2, 3.6, 4.4, 4.5, 4.1, 3.8, 7.7, 4.7, 2.4, 2.6), | |
Arge=c(2.7, 3, 3.4, 4, 3.4, 4, NA, NA, 1.3, 1.1), | |
Colo=c(1.6, NA, 1, 2.6, NA, 3.1, NA, 8.2, 3.5, 0.3) | |
) | |
) | |
names(data)<-c("score", "rank") | |
#Media dos % do 1o lugar entre superareas, comparando Brasil e Portugal | |
data_compare<-list( | |
data.frame( | |
Brasil=c(41, 47, 54, 46, 58), | |
Portugal=c(81, 78, 81, 82, 72) | |
), | |
data.frame( | |
Brasil=c(48.5, 44, 37, 38, 40), | |
Portugal=c(73, 36, 69, 56, 75) | |
), | |
data.frame( | |
Brasil=c(18, 27, 32, 31), | |
Portugal=c(37, 54, 67, 60) | |
)) | |
names(data_compare)<-c("Vida", "Saúde", "Humanas") | |
#Transformando dados em tabela e acrescentando nomes de areas de estudo | |
data.df<-as.data.frame(data) | |
rownames(data.df)<-c("Bio-Agro", "Bioquim", "Farmacol", "Imuno", "Neuro", "Medicina", "Odonto", "Enferm", "Psico", "Vet") | |
rank.data<-melt(t(data.df))%>% filter(str_detect(X1,"rank")) | |
score.data<-melt(t(data.df))%>% filter(str_detect(X1,"score")) | |
#reordenando fatores de dados conforme a estetica de plotagem adotada pelo MHL | |
rank.data$X1<-factor(rank.data$X1, levels = c("rank.Bras", "rank.Port", "rank.Arge", "rank.Chil", "rank.Colo")) | |
score.data$X1<-factor(score.data$X1, levels = c("score.Bras", "score.Port", "score.Arge", "score.Chil", "score.Colo")) | |
#Gerando estatistica descritiva por paises conforme constava no arquivo Excel original (util depois) | |
summary.data<-ddply(melt(data.df), c("variable"), summarise, min=min(value, na.rm=T), max =max(value, na.rm=T),mean=mean(value, na.rm=T), sd=sd(value, na.rm=T), se = sd/sqrt(length(value))) | |
#Plot dados em nuvem de pontos conforme constava no arquivo excel original | |
plot(melt(data[2])$value, melt(data[1])$value) | |
#Graficos que aparecem no artigo de divulgacao | |
#Plot de colunas, entre areas de estudo | |
ggplot(score.data, aes(fill=X2, y=value, x=X1))+ | |
geom_bar(position="dodge", stat="identity", width = 0.8)+ | |
scale_fill_jco()+ | |
scale_x_discrete(name= '\nPaíses', labels = c("Brasil","Portugal", "Argentina","Chile","Colômbia"), expand = c(0.02, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+ | |
labs(title = "Percentual do CPP do 1o Lugar", font="bold", size=20, fill = "Área de Estudo")+ | |
theme_light()+ | |
theme( | |
plot.title = element_text(color="red", size=20, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12), | |
legend.title=element_text(size=12), | |
legend.text=element_text(size=11)) | |
#Boxplot entre paises por escore | |
ggplot(score.data, aes(fill=X1, y=value, x=X1))+ | |
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 1.2) + | |
geom_boxplot(width = 0.7, lwd = 1.2, fatten = 1, outlier.shape=NA)+ | |
geom_jitter(width=0.1,alpha=0.6)+ | |
scale_fill_jco()+ | |
scale_x_discrete(name= 'Países', labels = c("Brasil","Portugal", "Argentina","Chile","Colômbia"), expand = c(0.1, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+ | |
labs(title = '% do CPP do 1o Lugar', font="bold", size=16, fill = "Area de Estudo")+ | |
theme_light()+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=20, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold"), | |
axis.title.y = element_text(size=20, face="bold"), | |
axis.text.x=element_text(size=14, angle = 45, hjust = 1), | |
axis.text.y=element_text(size=12)) | |
#Boxplot entre paises por ranque | |
ggplot(rank.data, aes(fill=X1, y=value, x=X1))+ | |
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 1.2) + | |
geom_boxplot(width = 0.7, lwd = 1.2, fatten = 1, outlier.shape=NA)+ | |
geom_jitter(width=0.1,alpha=0.5)+ | |
scale_fill_jco()+ | |
scale_x_discrete(name= 'Países', labels = c("Brasil","Portugal", "Argentina","Chile","Colômbia"), expand = c(0.1, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,10), breaks=c(0,2.5,5.0,7.5,10.0), expand = c(0, 0))+ | |
labs(title = 'Rank-Score: Vida e Saúde', font="bold", size=16, fill = "Area de Estudo")+ | |
theme_light()+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=20, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold"), | |
axis.title.y = element_text(size=20, face="bold"), | |
axis.text.x=element_text(size=14, angle = 45, hjust = 1), | |
axis.text.y=element_text(size=12)) | |
#Boxplots suplementares comparando Brasil e Portugal (separados para decidir utilidade) | |
#Preparando | |
#Vida | |
Vida<-ggplot(melt(data_compare)[1:10,], aes(fill=variable, y=value, x=variable))+ | |
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 1.2) + | |
geom_boxplot(width = 0.7, lwd = 1.2, fatten = 1)+ | |
scale_fill_jco()+ | |
scale_x_discrete(name= '\nPaíses', labels = c("Brasil","Portugal"), expand = c(0.1, 0))+ | |
scale_y_continuous(name= 'Percentual do CPP do 1o Lugar', limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+ | |
labs(title = "Vida", font="bold", size=16)+ | |
theme_light()+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold",color="white"), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#Saúde | |
Saude<-ggplot(melt(data_compare)[11:20,], aes(fill=variable, y=value, x=variable))+ | |
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 1.2) + | |
geom_boxplot(width = 0.7, lwd = 1.2, fatten = 1)+ | |
scale_fill_jco()+ | |
scale_x_discrete(name= '\nPaíses', labels = c("Brasil","Portugal"), expand = c(0.1, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+ | |
labs(title = "Saúde", font="bold", size=16)+ | |
theme_light()+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold"), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#Humanas | |
Humanas<-ggplot(melt(data_compare)[21:28,], aes(fill=variable, y=value, x=variable))+ | |
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 1.2) + | |
geom_boxplot(width = 0.7, lwd = 1.2, fatten = 1)+ | |
scale_fill_jco()+ | |
scale_x_discrete(name= '\nPaíses', labels = c("Brasil","Portugal"), expand = c(0.1, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+ | |
labs(title = "Humanas", font="bold", size=16)+ | |
theme_light()+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold", ,color="white"), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#Plotando o conjunto | |
multiplot(Vida, Saude, Humanas, cols=3) | |
########################################## |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment