Skip to content

Instantly share code, notes, and snippets.

@eduardofox2
Last active November 1, 2019 12:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eduardofox2/2f850cfa4e839eb8a7c538b1203753e3 to your computer and use it in GitHub Desktop.
Save eduardofox2/2f850cfa4e839eb8a7c538b1203753e3 to your computer and use it in GitHub Desktop.
##############Script R escrito por Eduardo G P Fox em 17/10/2019#########################
#Auxiliando colegas em seu manuscrito sobre cientometria
#Relativo a um artigo sobre o impacto relativo da ciencia brasileira em História, idealizado pelo Prof. Marcelo Hermes de Lima
#Publicado em: https://olivre.com.br/o-ocaso-da-pesquisa-em-historia-no-brasil
#Por favor, enviem comentarios sobre o presente script para meu site/conta pessoal
####### (https://ofoxofox.wixsite.com/research) & Twitter @ofoxofox ##########
#Pacotes R utilizados
library(ggplot2)
library(stringr)
library(plyr)
library(reshape2)
library(tidyverse)
library(ggsci)
##Caso falte algum acima no GUI local, utilizar install.packages() para rodar o script##
#Notar que algumas versoes do R não possuem ferramentas para calcular Standard Error, por motivos educacionais
#Inserindo nova função se abaixo, para atender pedido estetico do Prof. MHL
se <- function(x) sqrt(var(x)/length(x))
#Nova função para plotar gráficos em conjunto
# Multiple plot function obtained from R CookBook Website
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
#####Dados obtidos pelo Prof. Marcelo Hermes de Lima de revistas de Historia no JCR#######
#Obtidos da SciMago, relativos a países/territorios selecionados para análise no triênio 2015-2018, conforme listados
#Abaixo temos "citable papers", "cited papers", "% citado relativo ao mais citado",
#"Citations Per Paper" e "cited" como total de citações recebidas.
citable<-data.frame(
UK =c(193, 124, 103, 117, 197),
USA=c(547, 171, 79, 131, 125),
BRA=c(51, 126, 128, 136, 94)
)
cited<-data.frame(
UK =c(155, 92, 69, 77, 198),
USA=c(468, 131, 57, 95, 93),
BRA=c(16, 13, 6, 10, 9)
)
cited_perc<-data.frame(
UK =c(80.3, 74.2, 67, 65.8, 100),
USA=c(85.6, 76.6, 72.2, 72.5, 74.4),
BRA=c(31.4, 2.4, 4.7, 7.4, 9.6)
)
CpP<-data.frame(
UK =c(3.42, 3.613, 1.971, 1.889, 3.964),
USA=c(3.347, 2.292, 2.696, 2.122, 2.792),
BRA=c(0.471, 0.103, 0.07, 0.074, 0.149)
)
cites<-data.frame(
UK =c(660, 448, 203, 221, 781),
USA=c(1831, 392, 213, 278, 349),
BRA=c(24, 13, 9, 10, 14)
)
Int_Col<-data.frame(
UK =c(16.4, 13.3, 47.1, 21.2, 23.4),
USA=c(56.6, 40, 17.4, 16.1, 21.1),
BRA=c(0, 2.9, 8.9, 17, 13.2)
)
perc_prim<-data.frame(
Por=c(32.4, 51.2, 59.9, 47.1),
Arg=c(47.1, 30.7, 32.3, 37.1),
Bra=c(17.6, 15, 16.8, 20.6)
)
#Série temporal de % de citações recebidas pelas revistas brasileiras selecionadas
#relativo ao mais citado no período
Temporal1<-data.frame(
Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018),
Perc_prim=c(30, 25.9, 34, 18, 18, 23.1, 27.5, 20.6, 16.8, 15, 17.6),
rank_score=c(0.8, 1.2, 1.8, 1.1, 0.9, 0.6, 1.6, 0.8, 1.2, 0.2, 1.2)
)
#Brasil x Argentina em Historia
#Numeros de artigos
Brasil_e_Argentina<-data.frame(
Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018),
BR=c(88, 156, 196, 227, 329, 349, 399, 413, 463, 469, 423),
AR=c(43, 64, 72, 106, 133, 146, 188, 200, 265, 295, 339)
)
#Relação de numero de artigos x impacto relativo (% CpP 1o lugar) do Brasil
Papers_vs_impacto<-data.frame(
Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018),
Numero=c(88, 156, 196, 227, 329, 349, 399, 413, 463, 469, 423),
Perc_Primeiro=c(30, 25.9, 34, 18, 18, 23.1, 27.5, 20.6, 16.8, 15, 17.6)
)
################### Plots dos Dados conforme estética definida pelo MHL ##################
#Formato em Barplots para o artigo de divulgação (Maioria dos Leitores ainda nao entende formas mais claras de apresentar os dados)
#CPP - Citations per Paper
CPP<-ggplot(melt(CpP),
aes(x=variable,
y=value,
fill=variable))+
geom_bar(position = 'dodge',
colour = 'white',
stat = 'summary',
alpha = 0.8,
fun.y = 'mean',
width = 0.7) +
geom_point(aes(x = variable),
colour = 'black',
shape = 19,
position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
size = 1.5)+
scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,5), breaks=c(0,1,2,3,4,5), expand = c(0, 0))+
labs(title = "CPP", font="bold", size=9)+
scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#%Cited - percentual relativo ao mais citado
Cited<-ggplot(melt(cited_perc),
aes(x=variable,
y=value,
fill=variable))+
geom_bar(position = 'dodge',
colour = 'white',
stat = 'summary',
alpha = 0.8,
fun.y = 'mean',
width = 0.7) +
geom_point(aes(x = variable),
colour = 'black',
shape = 19,
position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
size = 1.5)+
scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+
labs(title = "% artigos citados", font="% cited", size=9)+
scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Colaboração Internacional - medido por ...? [Note to self: Perguntar ao MHL e/ou Volpato]
Colab<-ggplot(melt(Int_Col),
aes(x=variable,
y=value,
fill=variable))+
geom_bar(position = 'dodge',
colour = 'white',
stat = 'summary',
alpha = 0.8,
fun.y = 'mean',
width = 0.7) +
geom_point(aes(x = variable),
colour = 'black',
shape = 19,
position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
size = 1.5)+
scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,60), breaks=c(0,15,30,45,60), expand = c(0.0, 0))+
labs(title = "Internacionalização", font="bold", size=9)+
scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Plotando os acima em conjunto usando script descrito no início
multiplot(CPP, Cited, Colab, cols=3)
#Barplot % do primeiro lugar
barplot<-ggplot(melt(perc_prim),
aes(x=variable,
y=value,
fill=variable))+
stat_summary(fun.y = mean, geom = "bar", size=1, col="black", width=0.5)+
stat_summary(fun.ymin = function(x) mean(x) - 0,
fun.ymax = function(x) mean(x) + se(x),
geom = "errorbar", size=1, width=0.2)+
geom_point(aes(x = variable),
colour = 'black',
shape = 19,
position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
size = 3)+
scale_x_discrete(name= '\nPaíses', labels = c("Portugal", "Argentina", "Brasil"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0.0, 0))+
labs(title = "% do CPP do Primeiro Lugar (2015-2018)", font="bold", size=9)+
scale_fill_manual(values=c("#ff0000", "#75aadb", "#009C3B"))+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_blank(),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Plots de nuvem de pontos para regressão
#% do CPP primeiro colocado
Perc_prim<-ggplot(Temporal1[,1:2], aes(y=Perc_prim, x=Ano))+
geom_smooth(method = "loess", alpha = 0.05, size = 1, span = 1) +
geom_point(size = 3.5)+
scale_x_continuous(name= NULL, limits=c(2007,2019), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0.2))+
scale_y_continuous(name= NULL, limits=c(0,50), breaks=c(0,15,30,45), expand = c(0, 0))+
theme_light()+
labs(title = "% do CPP do Primeiro Lugar", font="bold", size=16)+
labs(caption="\n* Escala do eixo y cortada em 50% para enfatizar tendência da curva", size=9)+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#rank-score
Rank<-ggplot(Temporal1[,c(1,3)], aes(y=rank_score, x=Ano))+
geom_smooth(method='lm',formula=y~x, se=FALSE)+
geom_point(size = 3.5)+
scale_x_continuous(name= NULL, limits=c(2007.5,2018.5), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0))+
scale_y_continuous(name= NULL, limits=c(0,10), breaks=c(2,4,6,8,10), expand = c(0, 0))+
theme_light()+
labs(title = "Rank Score", font="bold", size=16)+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Plotando em conjunto
multiplot(Rank, Perc_prim, cols=2)
#Relacionando Brasil e Argentina
Brasil_Argentina<-ggplot(Brasil_e_Argentina)+
geom_point(aes(y=BR, x=Ano), colour="#009C3B", size = 3)+
geom_smooth(aes(y=BR, x=Ano), method = "loess", se = FALSE, size = 1, span = 1, colour="#009C3B") +
geom_point(aes(y=AR, x=Ano), colour="#75aadb", size = 3)+
geom_smooth(aes(y=AR, x=Ano), method = "loess", se = FALSE, size = 1, span = 1, colour="#75aadb") +
scale_x_continuous(name= NULL, limits=c(2007,2019), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0.2))+
scale_y_continuous(name= NULL, limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+
theme_light()+
labs(title = "Número de artigos publicados", font="bold", size=16)+
theme(
legend.position = "bottom",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Plotando em conjunto conforme script descrito no início
multiplot(barplot, Brasil_Argentina, cols=2)
# Figura final relacionando numero de papers e impacto por citations
#Dificuldade encontrada em automaticamente inserir a formula de regressao
#Primeira tentativa usando plot automatico de parametros de regressao:
#Solucão parcial obtida em https://stackoverflow.com/questions/7549694/add-regression-line-equation-and-r2-on-graph
lm_eqn <- function(df){
m <- lm(y ~ x, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(unname(coef(m)[1]), digits = 2),
b = format(unname(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
#Vide que codigo fornecido nao funciona bem, pois exige um formato especifico de dataframe e resulta em slope invertido
#[Quem souber corrigir, por favor edite o script]
#Renomeando dados para encaixar formato na formula acima
df<-Papers_vs_impacto
names(df)<-c("Ano", "x", "y")
#Plotando de acordo com primeira tentativa
ggplot(df, aes(y=y, x=x))+
geom_point(size=3)+
geom_smooth(method = "lm", se = FALSE)+
geom_text(x = 350, y = 7, label = lm_eqn(df), parse = TRUE)+
scale_x_continuous(name= NULL, limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+
scale_y_continuous(name= NULL, limits=c(0,50), breaks=c(0,15,30,45), expand = c(0, 0))+
theme_light()+
labs(title = "Papers (x) versus Impacto (Y)", font="bold", size=16)+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Notar output estranho acima. Ajustado esteticamente abaixo como "gambiarra". Fico no aguardo de sugestoes.
#Parametros conforme automaticamente providos pelo script acima:
lb1<-paste("R^2 == ", 0.463)
lb2<-"y = - 0.032x + 33"
#Plotando manualmente com estes parametros
ggplot(Papers_vs_impacto, aes(y=Perc_Primeiro, x=Numero))+
geom_point(size=3)+
geom_smooth(method = "lm", se = FALSE)+
annotate("text", x = 350, y = 4, label =lb1, fontface=3, parse=TRUE)+
annotate("text", x = 350, y = 7, label = lb2)+
scale_x_continuous(limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+
scale_y_continuous(limits=c(0,50), breaks=c(15,30,45), expand = c(0, 0))+
theme_light()+
labs(title = "Correlação entre Quantidade vs. Impacto", y = "% do CPP do Primeiro Lugar", x = "Número de artigos publicados")+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=12, face="bold"),
axis.title.y = element_text(size=13, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
###################################################################################################################################
@eduardofox2
Copy link
Author

Por favor, se alguem souber como corrigir o problema discutido no ultimo plot, deixar sugestoes e/ou edits.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment