Last active
November 1, 2019 12:40
-
-
Save eduardofox2/2f850cfa4e839eb8a7c538b1203753e3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##############Script R escrito por Eduardo G P Fox em 17/10/2019######################### | |
#Auxiliando colegas em seu manuscrito sobre cientometria | |
#Relativo a um artigo sobre o impacto relativo da ciencia brasileira em História, idealizado pelo Prof. Marcelo Hermes de Lima | |
#Publicado em: https://olivre.com.br/o-ocaso-da-pesquisa-em-historia-no-brasil | |
#Por favor, enviem comentarios sobre o presente script para meu site/conta pessoal | |
####### (https://ofoxofox.wixsite.com/research) & Twitter @ofoxofox ########## | |
#Pacotes R utilizados | |
library(ggplot2) | |
library(stringr) | |
library(plyr) | |
library(reshape2) | |
library(tidyverse) | |
library(ggsci) | |
##Caso falte algum acima no GUI local, utilizar install.packages() para rodar o script## | |
#Notar que algumas versoes do R não possuem ferramentas para calcular Standard Error, por motivos educacionais | |
#Inserindo nova função se abaixo, para atender pedido estetico do Prof. MHL | |
se <- function(x) sqrt(var(x)/length(x)) | |
#Nova função para plotar gráficos em conjunto | |
# Multiple plot function obtained from R CookBook Website | |
# | |
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) | |
# - cols: Number of columns in layout | |
# - layout: A matrix specifying the layout. If present, 'cols' is ignored. | |
# | |
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), | |
# then plot 1 will go in the upper left, 2 will go in the upper right, and | |
# 3 will go all the way across the bottom. | |
# | |
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { | |
library(grid) | |
# Make a list from the ... arguments and plotlist | |
plots <- c(list(...), plotlist) | |
numPlots = length(plots) | |
# If layout is NULL, then use 'cols' to determine layout | |
if (is.null(layout)) { | |
# Make the panel | |
# ncol: Number of columns of plots | |
# nrow: Number of rows needed, calculated from # of cols | |
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), | |
ncol = cols, nrow = ceiling(numPlots/cols)) | |
} | |
if (numPlots==1) { | |
print(plots[[1]]) | |
} else { | |
# Set up the page | |
grid.newpage() | |
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) | |
# Make each plot, in the correct location | |
for (i in 1:numPlots) { | |
# Get the i,j matrix positions of the regions that contain this subplot | |
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) | |
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, | |
layout.pos.col = matchidx$col)) | |
} | |
} | |
} | |
#####Dados obtidos pelo Prof. Marcelo Hermes de Lima de revistas de Historia no JCR####### | |
#Obtidos da SciMago, relativos a países/territorios selecionados para análise no triênio 2015-2018, conforme listados | |
#Abaixo temos "citable papers", "cited papers", "% citado relativo ao mais citado", | |
#"Citations Per Paper" e "cited" como total de citações recebidas. | |
citable<-data.frame( | |
UK =c(193, 124, 103, 117, 197), | |
USA=c(547, 171, 79, 131, 125), | |
BRA=c(51, 126, 128, 136, 94) | |
) | |
cited<-data.frame( | |
UK =c(155, 92, 69, 77, 198), | |
USA=c(468, 131, 57, 95, 93), | |
BRA=c(16, 13, 6, 10, 9) | |
) | |
cited_perc<-data.frame( | |
UK =c(80.3, 74.2, 67, 65.8, 100), | |
USA=c(85.6, 76.6, 72.2, 72.5, 74.4), | |
BRA=c(31.4, 2.4, 4.7, 7.4, 9.6) | |
) | |
CpP<-data.frame( | |
UK =c(3.42, 3.613, 1.971, 1.889, 3.964), | |
USA=c(3.347, 2.292, 2.696, 2.122, 2.792), | |
BRA=c(0.471, 0.103, 0.07, 0.074, 0.149) | |
) | |
cites<-data.frame( | |
UK =c(660, 448, 203, 221, 781), | |
USA=c(1831, 392, 213, 278, 349), | |
BRA=c(24, 13, 9, 10, 14) | |
) | |
Int_Col<-data.frame( | |
UK =c(16.4, 13.3, 47.1, 21.2, 23.4), | |
USA=c(56.6, 40, 17.4, 16.1, 21.1), | |
BRA=c(0, 2.9, 8.9, 17, 13.2) | |
) | |
perc_prim<-data.frame( | |
Por=c(32.4, 51.2, 59.9, 47.1), | |
Arg=c(47.1, 30.7, 32.3, 37.1), | |
Bra=c(17.6, 15, 16.8, 20.6) | |
) | |
#Série temporal de % de citações recebidas pelas revistas brasileiras selecionadas | |
#relativo ao mais citado no período | |
Temporal1<-data.frame( | |
Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018), | |
Perc_prim=c(30, 25.9, 34, 18, 18, 23.1, 27.5, 20.6, 16.8, 15, 17.6), | |
rank_score=c(0.8, 1.2, 1.8, 1.1, 0.9, 0.6, 1.6, 0.8, 1.2, 0.2, 1.2) | |
) | |
#Brasil x Argentina em Historia | |
#Numeros de artigos | |
Brasil_e_Argentina<-data.frame( | |
Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018), | |
BR=c(88, 156, 196, 227, 329, 349, 399, 413, 463, 469, 423), | |
AR=c(43, 64, 72, 106, 133, 146, 188, 200, 265, 295, 339) | |
) | |
#Relação de numero de artigos x impacto relativo (% CpP 1o lugar) do Brasil | |
Papers_vs_impacto<-data.frame( | |
Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018), | |
Numero=c(88, 156, 196, 227, 329, 349, 399, 413, 463, 469, 423), | |
Perc_Primeiro=c(30, 25.9, 34, 18, 18, 23.1, 27.5, 20.6, 16.8, 15, 17.6) | |
) | |
################### Plots dos Dados conforme estética definida pelo MHL ################## | |
#Formato em Barplots para o artigo de divulgação (Maioria dos Leitores ainda nao entende formas mais claras de apresentar os dados) | |
#CPP - Citations per Paper | |
CPP<-ggplot(melt(CpP), | |
aes(x=variable, | |
y=value, | |
fill=variable))+ | |
geom_bar(position = 'dodge', | |
colour = 'white', | |
stat = 'summary', | |
alpha = 0.8, | |
fun.y = 'mean', | |
width = 0.7) + | |
geom_point(aes(x = variable), | |
colour = 'black', | |
shape = 19, | |
position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5), | |
size = 1.5)+ | |
scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,5), breaks=c(0,1,2,3,4,5), expand = c(0, 0))+ | |
labs(title = "CPP", font="bold", size=9)+ | |
scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+ | |
theme_light()+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold", ,color="white"), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#%Cited - percentual relativo ao mais citado | |
Cited<-ggplot(melt(cited_perc), | |
aes(x=variable, | |
y=value, | |
fill=variable))+ | |
geom_bar(position = 'dodge', | |
colour = 'white', | |
stat = 'summary', | |
alpha = 0.8, | |
fun.y = 'mean', | |
width = 0.7) + | |
geom_point(aes(x = variable), | |
colour = 'black', | |
shape = 19, | |
position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5), | |
size = 1.5)+ | |
scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+ | |
labs(title = "% artigos citados", font="% cited", size=9)+ | |
scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+ | |
theme_light()+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold", ,color="white"), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#Colaboração Internacional - medido por ...? [Note to self: Perguntar ao MHL e/ou Volpato] | |
Colab<-ggplot(melt(Int_Col), | |
aes(x=variable, | |
y=value, | |
fill=variable))+ | |
geom_bar(position = 'dodge', | |
colour = 'white', | |
stat = 'summary', | |
alpha = 0.8, | |
fun.y = 'mean', | |
width = 0.7) + | |
geom_point(aes(x = variable), | |
colour = 'black', | |
shape = 19, | |
position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5), | |
size = 1.5)+ | |
scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,60), breaks=c(0,15,30,45,60), expand = c(0.0, 0))+ | |
labs(title = "Internacionalização", font="bold", size=9)+ | |
scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+ | |
theme_light()+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold", ,color="white"), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#Plotando os acima em conjunto usando script descrito no início | |
multiplot(CPP, Cited, Colab, cols=3) | |
#Barplot % do primeiro lugar | |
barplot<-ggplot(melt(perc_prim), | |
aes(x=variable, | |
y=value, | |
fill=variable))+ | |
stat_summary(fun.y = mean, geom = "bar", size=1, col="black", width=0.5)+ | |
stat_summary(fun.ymin = function(x) mean(x) - 0, | |
fun.ymax = function(x) mean(x) + se(x), | |
geom = "errorbar", size=1, width=0.2)+ | |
geom_point(aes(x = variable), | |
colour = 'black', | |
shape = 19, | |
position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5), | |
size = 3)+ | |
scale_x_discrete(name= '\nPaíses', labels = c("Portugal", "Argentina", "Brasil"), expand = c(0.1, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0.0, 0))+ | |
labs(title = "% do CPP do Primeiro Lugar (2015-2018)", font="bold", size=9)+ | |
scale_fill_manual(values=c("#ff0000", "#75aadb", "#009C3B"))+ | |
theme_light()+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_blank(), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#Plots de nuvem de pontos para regressão | |
#% do CPP primeiro colocado | |
Perc_prim<-ggplot(Temporal1[,1:2], aes(y=Perc_prim, x=Ano))+ | |
geom_smooth(method = "loess", alpha = 0.05, size = 1, span = 1) + | |
geom_point(size = 3.5)+ | |
scale_x_continuous(name= NULL, limits=c(2007,2019), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0.2))+ | |
scale_y_continuous(name= NULL, limits=c(0,50), breaks=c(0,15,30,45), expand = c(0, 0))+ | |
theme_light()+ | |
labs(title = "% do CPP do Primeiro Lugar", font="bold", size=16)+ | |
labs(caption="\n* Escala do eixo y cortada em 50% para enfatizar tendência da curva", size=9)+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold", ,color="white"), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#rank-score | |
Rank<-ggplot(Temporal1[,c(1,3)], aes(y=rank_score, x=Ano))+ | |
geom_smooth(method='lm',formula=y~x, se=FALSE)+ | |
geom_point(size = 3.5)+ | |
scale_x_continuous(name= NULL, limits=c(2007.5,2018.5), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,10), breaks=c(2,4,6,8,10), expand = c(0, 0))+ | |
theme_light()+ | |
labs(title = "Rank Score", font="bold", size=16)+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold", ,color="white"), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#Plotando em conjunto | |
multiplot(Rank, Perc_prim, cols=2) | |
#Relacionando Brasil e Argentina | |
Brasil_Argentina<-ggplot(Brasil_e_Argentina)+ | |
geom_point(aes(y=BR, x=Ano), colour="#009C3B", size = 3)+ | |
geom_smooth(aes(y=BR, x=Ano), method = "loess", se = FALSE, size = 1, span = 1, colour="#009C3B") + | |
geom_point(aes(y=AR, x=Ano), colour="#75aadb", size = 3)+ | |
geom_smooth(aes(y=AR, x=Ano), method = "loess", se = FALSE, size = 1, span = 1, colour="#75aadb") + | |
scale_x_continuous(name= NULL, limits=c(2007,2019), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0.2))+ | |
scale_y_continuous(name= NULL, limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+ | |
theme_light()+ | |
labs(title = "Número de artigos publicados", font="bold", size=16)+ | |
theme( | |
legend.position = "bottom", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold", ,color="white"), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#Plotando em conjunto conforme script descrito no início | |
multiplot(barplot, Brasil_Argentina, cols=2) | |
# Figura final relacionando numero de papers e impacto por citations | |
#Dificuldade encontrada em automaticamente inserir a formula de regressao | |
#Primeira tentativa usando plot automatico de parametros de regressao: | |
#Solucão parcial obtida em https://stackoverflow.com/questions/7549694/add-regression-line-equation-and-r2-on-graph | |
lm_eqn <- function(df){ | |
m <- lm(y ~ x, df); | |
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, | |
list(a = format(unname(coef(m)[1]), digits = 2), | |
b = format(unname(coef(m)[2]), digits = 2), | |
r2 = format(summary(m)$r.squared, digits = 3))) | |
as.character(as.expression(eq)); | |
} | |
#Vide que codigo fornecido nao funciona bem, pois exige um formato especifico de dataframe e resulta em slope invertido | |
#[Quem souber corrigir, por favor edite o script] | |
#Renomeando dados para encaixar formato na formula acima | |
df<-Papers_vs_impacto | |
names(df)<-c("Ano", "x", "y") | |
#Plotando de acordo com primeira tentativa | |
ggplot(df, aes(y=y, x=x))+ | |
geom_point(size=3)+ | |
geom_smooth(method = "lm", se = FALSE)+ | |
geom_text(x = 350, y = 7, label = lm_eqn(df), parse = TRUE)+ | |
scale_x_continuous(name= NULL, limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+ | |
scale_y_continuous(name= NULL, limits=c(0,50), breaks=c(0,15,30,45), expand = c(0, 0))+ | |
theme_light()+ | |
labs(title = "Papers (x) versus Impacto (Y)", font="bold", size=16)+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=20, face="bold", ,color="white"), | |
axis.title.y = element_text(size=14, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
#Notar output estranho acima. Ajustado esteticamente abaixo como "gambiarra". Fico no aguardo de sugestoes. | |
#Parametros conforme automaticamente providos pelo script acima: | |
lb1<-paste("R^2 == ", 0.463) | |
lb2<-"y = - 0.032x + 33" | |
#Plotando manualmente com estes parametros | |
ggplot(Papers_vs_impacto, aes(y=Perc_Primeiro, x=Numero))+ | |
geom_point(size=3)+ | |
geom_smooth(method = "lm", se = FALSE)+ | |
annotate("text", x = 350, y = 4, label =lb1, fontface=3, parse=TRUE)+ | |
annotate("text", x = 350, y = 7, label = lb2)+ | |
scale_x_continuous(limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+ | |
scale_y_continuous(limits=c(0,50), breaks=c(15,30,45), expand = c(0, 0))+ | |
theme_light()+ | |
labs(title = "Correlação entre Quantidade vs. Impacto", y = "% do CPP do Primeiro Lugar", x = "Número de artigos publicados")+ | |
theme( | |
legend.position = "none", | |
plot.title = element_text(color="red", size=16, face="bold.italic"), | |
axis.title.x = element_text(size=12, face="bold"), | |
axis.title.y = element_text(size=13, face="bold"), | |
axis.text.x=element_text(size=14), | |
axis.text.y=element_text(size=12)) | |
################################################################################################################################### |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Por favor, se alguem souber como corrigir o problema discutido no ultimo plot, deixar sugestoes e/ou edits.