eduardofox2/Script_Historia_MHL.txt

## Script_Historia_MHL.txt
##############Script R escrito por Eduardo G P Fox em 17/10/2019#########################
#Auxiliando colegas em seu manuscrito sobre cientometria
#Relativo a um artigo sobre o impacto relativo da ciencia brasileira em História, idealizado pelo Prof. Marcelo Hermes de Lima
#Publicado em: https://olivre.com.br/o-ocaso-da-pesquisa-em-historia-no-brasil

#Por favor, enviem comentarios sobre o presente script para meu site/conta pessoal
#######     (https://ofoxofox.wixsite.com/research) & Twitter @ofoxofox    ##########

#Pacotes R utilizados
library(ggplot2)
library(stringr)
library(plyr)
library(reshape2)
library(tidyverse)
library(ggsci)
##Caso falte algum acima no GUI local, utilizar install.packages() para rodar o script##

#Notar que algumas versoes do R não possuem ferramentas para calcular Standard Error, por motivos educacionais
#Inserindo nova função se abaixo, para atender pedido estetico do Prof. MHL
se <- function(x) sqrt(var(x)/length(x))

#Nova função para plotar gráficos em conjunto
# Multiple plot function obtained from R CookBook Website
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

#####Dados obtidos pelo Prof. Marcelo Hermes de Lima de revistas de Historia no JCR#######
#Obtidos da SciMago, relativos a países/territorios selecionados para análise no triênio 2015-2018, conforme listados
#Abaixo temos "citable papers", "cited papers", "% citado relativo ao mais citado",
#"Citations Per Paper" e "cited" como total de citações recebidas.

citable<-data.frame(
UK =c(193, 124, 103, 117, 197),
USA=c(547, 171, 79, 131, 125),
BRA=c(51, 126, 128, 136, 94)
)

cited<-data.frame(
UK =c(155, 92, 69, 77, 198),
USA=c(468, 131, 57, 95, 93),
BRA=c(16, 13, 6, 10, 9)
)

cited_perc<-data.frame(
UK =c(80.3, 74.2, 67, 65.8, 100),
USA=c(85.6, 76.6, 72.2, 72.5, 74.4),
BRA=c(31.4, 2.4, 4.7, 7.4, 9.6)
)

CpP<-data.frame(
UK =c(3.42, 3.613, 1.971, 1.889, 3.964),
USA=c(3.347, 2.292, 2.696, 2.122, 2.792),
BRA=c(0.471, 0.103, 0.07, 0.074, 0.149)
)

cites<-data.frame(
UK =c(660, 448, 203, 221, 781),
USA=c(1831, 392, 213, 278, 349),
BRA=c(24, 13, 9, 10, 14)
)

Int_Col<-data.frame(
UK =c(16.4, 13.3, 47.1, 21.2, 23.4),
USA=c(56.6, 40, 17.4, 16.1, 21.1),
BRA=c(0, 2.9, 8.9, 17, 13.2)
)

perc_prim<-data.frame(
Por=c(32.4, 51.2, 59.9, 47.1),
Arg=c(47.1, 30.7, 32.3, 37.1),
Bra=c(17.6, 15, 16.8, 20.6)
)

#Série temporal de % de citações recebidas pelas revistas brasileiras selecionadas
#relativo ao mais citado no período

Temporal1<-data.frame(
Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018),
Perc_prim=c(30, 25.9, 34, 18, 18, 23.1, 27.5, 20.6, 16.8, 15, 17.6),
rank_score=c(0.8, 1.2, 1.8, 1.1, 0.9, 0.6, 1.6, 0.8, 1.2, 0.2, 1.2)
)

#Brasil x Argentina em Historia
#Numeros de artigos
Brasil_e_Argentina<-data.frame(
Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018),
BR=c(88, 156, 196, 227, 329, 349, 399, 413, 463, 469, 423),
AR=c(43, 64, 72, 106, 133, 146, 188, 200, 265, 295, 339)
)

#Relação de numero de artigos x impacto relativo (% CpP 1o lugar) do Brasil
Papers_vs_impacto<-data.frame(
Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018),
Numero=c(88, 156, 196, 227, 329, 349, 399, 413, 463, 469, 423),
Perc_Primeiro=c(30, 25.9, 34, 18, 18, 23.1, 27.5, 20.6, 16.8, 15, 17.6)
)

################### Plots dos Dados conforme estética definida pelo MHL ##################

#Formato em Barplots para o artigo de divulgação (Maioria dos Leitores ainda nao entende formas mais claras de apresentar os dados)
#CPP - Citations per Paper
CPP<-ggplot(melt(CpP),
 aes(x=variable,
  y=value,
   fill=variable))+
geom_bar(position = 'dodge',
 colour = 'white',
  stat = 'summary',
   alpha = 0.8,
    fun.y = 'mean',
     width = 0.7) +
geom_point(aes(x = variable),
 colour = 'black',
  shape = 19,
   position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
    size = 1.5)+
scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,5), breaks=c(0,1,2,3,4,5), expand = c(0, 0))+
labs(title = "CPP", font="bold", size=9)+
scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))

#%Cited - percentual relativo ao mais citado
Cited<-ggplot(melt(cited_perc),
 aes(x=variable,
  y=value,
   fill=variable))+
geom_bar(position = 'dodge',
 colour = 'white',
  stat = 'summary',
   alpha = 0.8,
    fun.y = 'mean',
     width = 0.7) +
geom_point(aes(x = variable),
 colour = 'black',
  shape = 19,
   position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
    size = 1.5)+
scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+
labs(title = "% artigos citados", font="% cited", size=9)+
scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))

#Colaboração Internacional - medido por ...? [Note to self: Perguntar ao MHL e/ou Volpato]
Colab<-ggplot(melt(Int_Col),
 aes(x=variable,
  y=value,
   fill=variable))+
geom_bar(position = 'dodge',
 colour = 'white',
  stat = 'summary',
   alpha = 0.8,
    fun.y = 'mean',
     width = 0.7) +
geom_point(aes(x = variable),
 colour = 'black',
  shape = 19,
   position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
    size = 1.5)+
scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,60), breaks=c(0,15,30,45,60), expand = c(0.0, 0))+
labs(title = "Internacionalização", font="bold", size=9)+
scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))

#Plotando os acima em conjunto usando script descrito no início
multiplot(CPP, Cited, Colab, cols=3)

#Barplot % do primeiro lugar
barplot<-ggplot(melt(perc_prim),
 aes(x=variable,
  y=value,
   fill=variable))+
stat_summary(fun.y = mean, geom = "bar", size=1, col="black", width=0.5)+
                     stat_summary(fun.ymin = function(x) mean(x) - 0,
                                  fun.ymax = function(x) mean(x) + se(x),
                                  geom = "errorbar", size=1, width=0.2)+
geom_point(aes(x = variable),
 colour = 'black',
  shape = 19,
   position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
    size = 3)+
scale_x_discrete(name= '\nPaíses', labels = c("Portugal", "Argentina", "Brasil"), expand = c(0.1, 0))+
scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0.0, 0))+
labs(title = "% do CPP do Primeiro Lugar (2015-2018)", font="bold", size=9)+
scale_fill_manual(values=c("#ff0000", "#75aadb", "#009C3B"))+
theme_light()+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_blank(),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))


#Plots de nuvem de pontos para regressão
#% do CPP primeiro colocado
Perc_prim<-ggplot(Temporal1[,1:2], aes(y=Perc_prim, x=Ano))+
geom_smooth(method = "loess", alpha = 0.05, size = 1, span = 1) +
geom_point(size = 3.5)+
scale_x_continuous(name= NULL, limits=c(2007,2019), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0.2))+
scale_y_continuous(name= NULL, limits=c(0,50), breaks=c(0,15,30,45), expand = c(0, 0))+
theme_light()+
labs(title = "% do CPP do Primeiro Lugar", font="bold", size=16)+
labs(caption="\n* Escala do eixo y cortada em 50% para enfatizar tendência da curva", size=9)+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))

#rank-score
Rank<-ggplot(Temporal1[,c(1,3)], aes(y=rank_score, x=Ano))+
geom_smooth(method='lm',formula=y~x, se=FALSE)+
geom_point(size = 3.5)+
scale_x_continuous(name= NULL, limits=c(2007.5,2018.5), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0))+
scale_y_continuous(name= NULL, limits=c(0,10), breaks=c(2,4,6,8,10), expand = c(0, 0))+
theme_light()+
labs(title = "Rank Score", font="bold", size=16)+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))

#Plotando em conjunto
multiplot(Rank, Perc_prim, cols=2)

#Relacionando Brasil e Argentina
Brasil_Argentina<-ggplot(Brasil_e_Argentina)+
geom_point(aes(y=BR, x=Ano), colour="#009C3B", size = 3)+
geom_smooth(aes(y=BR, x=Ano), method = "loess", se = FALSE, size = 1, span = 1, colour="#009C3B") +
geom_point(aes(y=AR, x=Ano), colour="#75aadb", size = 3)+
geom_smooth(aes(y=AR, x=Ano), method = "loess", se = FALSE, size = 1, span = 1, colour="#75aadb") +
scale_x_continuous(name= NULL, limits=c(2007,2019), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0.2))+
scale_y_continuous(name= NULL, limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+
theme_light()+
labs(title = "Número de artigos publicados", font="bold", size=16)+
theme(
legend.position = "bottom",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))

#Plotando em conjunto conforme script descrito no início
multiplot(barplot, Brasil_Argentina, cols=2)

# Figura final relacionando numero de papers e impacto por citations
#Dificuldade encontrada em automaticamente inserir a formula de regressao
#Primeira tentativa usando plot automatico de parametros de regressao:
#Solucão parcial obtida em https://stackoverflow.com/questions/7549694/add-regression-line-equation-and-r2-on-graph

lm_eqn <- function(df){
    m <- lm(y ~ x, df);
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}

#Vide que codigo fornecido nao funciona bem, pois exige um formato especifico de dataframe e resulta em slope invertido
#[Quem souber corrigir, por favor edite o script]
#Renomeando dados para encaixar formato na formula acima
df<-Papers_vs_impacto
names(df)<-c("Ano", "x", "y")

#Plotando de acordo com primeira tentativa
ggplot(df, aes(y=y, x=x))+
geom_point(size=3)+
geom_smooth(method = "lm", se = FALSE)+
geom_text(x = 350, y = 7, label = lm_eqn(df), parse = TRUE)+
scale_x_continuous(name= NULL, limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+
scale_y_continuous(name= NULL, limits=c(0,50), breaks=c(0,15,30,45), expand = c(0, 0))+
theme_light()+
labs(title = "Papers (x) versus Impacto (Y)", font="bold", size=16)+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold", ,color="white"),
axis.title.y = element_text(size=14, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))

#Notar output estranho acima. Ajustado esteticamente abaixo como "gambiarra". Fico no aguardo de sugestoes.
#Parametros conforme automaticamente providos pelo script acima:
lb1<-paste("R^2 == ", 0.463)
lb2<-"y = - 0.032x + 33"

#Plotando manualmente com estes parametros
ggplot(Papers_vs_impacto, aes(y=Perc_Primeiro, x=Numero))+
geom_point(size=3)+
geom_smooth(method = "lm", se = FALSE)+
annotate("text", x = 350, y = 4, label =lb1, fontface=3, parse=TRUE)+
annotate("text", x = 350, y = 7, label = lb2)+
scale_x_continuous(limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+
scale_y_continuous(limits=c(0,50), breaks=c(15,30,45), expand = c(0, 0))+
theme_light()+
labs(title = "Correlação entre Quantidade vs. Impacto", y = "% do CPP do Primeiro Lugar", x = "Número de artigos publicados")+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=12, face="bold"),
axis.title.y = element_text(size=13, face="bold"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))

###################################################################################################################################
	##############Script R escrito por Eduardo G P Fox em 17/10/2019#########################
	#Auxiliando colegas em seu manuscrito sobre cientometria
	#Relativo a um artigo sobre o impacto relativo da ciencia brasileira em História, idealizado pelo Prof. Marcelo Hermes de Lima
	#Publicado em: https://olivre.com.br/o-ocaso-da-pesquisa-em-historia-no-brasil

	#Por favor, enviem comentarios sobre o presente script para meu site/conta pessoal
	####### (https://ofoxofox.wixsite.com/research) & Twitter @ofoxofox ##########

	#Pacotes R utilizados
	library(ggplot2)
	library(stringr)
	library(plyr)
	library(reshape2)
	library(tidyverse)
	library(ggsci)
	##Caso falte algum acima no GUI local, utilizar install.packages() para rodar o script##

	#Notar que algumas versoes do R não possuem ferramentas para calcular Standard Error, por motivos educacionais
	#Inserindo nova função se abaixo, para atender pedido estetico do Prof. MHL
	se <- function(x) sqrt(var(x)/length(x))

	#Nova função para plotar gráficos em conjunto
	# Multiple plot function obtained from R CookBook Website
	#
	# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
	# - cols: Number of columns in layout
	# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
	#
	# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
	# then plot 1 will go in the upper left, 2 will go in the upper right, and
	# 3 will go all the way across the bottom.
	#
	multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
	library(grid)

	# Make a list from the ... arguments and plotlist
	plots <- c(list(...), plotlist)

	numPlots = length(plots)

	# If layout is NULL, then use 'cols' to determine layout
	if (is.null(layout)) {
	# Make the panel
	# ncol: Number of columns of plots
	# nrow: Number of rows needed, calculated from # of cols
	layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
	ncol = cols, nrow = ceiling(numPlots/cols))
	}

	if (numPlots==1) {
	print(plots[[1]])

	} else {
	# Set up the page
	grid.newpage()
	pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

	# Make each plot, in the correct location
	for (i in 1:numPlots) {
	# Get the i,j matrix positions of the regions that contain this subplot
	matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

	print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
	layout.pos.col = matchidx$col))
	}
	}
	}

	#####Dados obtidos pelo Prof. Marcelo Hermes de Lima de revistas de Historia no JCR#######
	#Obtidos da SciMago, relativos a países/territorios selecionados para análise no triênio 2015-2018, conforme listados
	#Abaixo temos "citable papers", "cited papers", "% citado relativo ao mais citado",
	#"Citations Per Paper" e "cited" como total de citações recebidas.

	citable<-data.frame(
	UK =c(193, 124, 103, 117, 197),
	USA=c(547, 171, 79, 131, 125),
	BRA=c(51, 126, 128, 136, 94)
	)

	cited<-data.frame(
	UK =c(155, 92, 69, 77, 198),
	USA=c(468, 131, 57, 95, 93),
	BRA=c(16, 13, 6, 10, 9)
	)

	cited_perc<-data.frame(
	UK =c(80.3, 74.2, 67, 65.8, 100),
	USA=c(85.6, 76.6, 72.2, 72.5, 74.4),
	BRA=c(31.4, 2.4, 4.7, 7.4, 9.6)
	)

	CpP<-data.frame(
	UK =c(3.42, 3.613, 1.971, 1.889, 3.964),
	USA=c(3.347, 2.292, 2.696, 2.122, 2.792),
	BRA=c(0.471, 0.103, 0.07, 0.074, 0.149)
	)

	cites<-data.frame(
	UK =c(660, 448, 203, 221, 781),
	USA=c(1831, 392, 213, 278, 349),
	BRA=c(24, 13, 9, 10, 14)
	)

	Int_Col<-data.frame(
	UK =c(16.4, 13.3, 47.1, 21.2, 23.4),
	USA=c(56.6, 40, 17.4, 16.1, 21.1),
	BRA=c(0, 2.9, 8.9, 17, 13.2)
	)

	perc_prim<-data.frame(
	Por=c(32.4, 51.2, 59.9, 47.1),
	Arg=c(47.1, 30.7, 32.3, 37.1),
	Bra=c(17.6, 15, 16.8, 20.6)
	)

	#Série temporal de % de citações recebidas pelas revistas brasileiras selecionadas
	#relativo ao mais citado no período

	Temporal1<-data.frame(
	Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018),
	Perc_prim=c(30, 25.9, 34, 18, 18, 23.1, 27.5, 20.6, 16.8, 15, 17.6),
	rank_score=c(0.8, 1.2, 1.8, 1.1, 0.9, 0.6, 1.6, 0.8, 1.2, 0.2, 1.2)
	)

	#Brasil x Argentina em Historia
	#Numeros de artigos
	Brasil_e_Argentina<-data.frame(
	Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018),
	BR=c(88, 156, 196, 227, 329, 349, 399, 413, 463, 469, 423),
	AR=c(43, 64, 72, 106, 133, 146, 188, 200, 265, 295, 339)
	)

	#Relação de numero de artigos x impacto relativo (% CpP 1o lugar) do Brasil
	Papers_vs_impacto<-data.frame(
	Ano=c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018),
	Numero=c(88, 156, 196, 227, 329, 349, 399, 413, 463, 469, 423),
	Perc_Primeiro=c(30, 25.9, 34, 18, 18, 23.1, 27.5, 20.6, 16.8, 15, 17.6)
	)

	################### Plots dos Dados conforme estética definida pelo MHL ##################

	#Formato em Barplots para o artigo de divulgação (Maioria dos Leitores ainda nao entende formas mais claras de apresentar os dados)
	#CPP - Citations per Paper
	CPP<-ggplot(melt(CpP),
	aes(x=variable,
	y=value,
	fill=variable))+
	geom_bar(position = 'dodge',
	colour = 'white',
	stat = 'summary',
	alpha = 0.8,
	fun.y = 'mean',
	width = 0.7) +
	geom_point(aes(x = variable),
	colour = 'black',
	shape = 19,
	position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
	size = 1.5)+
	scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+
	scale_y_continuous(name= NULL, limits=c(0,5), breaks=c(0,1,2,3,4,5), expand = c(0, 0))+
	labs(title = "CPP", font="bold", size=9)+
	scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+
	theme_light()+
	theme(
	legend.position = "none",
	plot.title = element_text(color="red", size=16, face="bold.italic"),
	axis.title.x = element_text(size=20, face="bold", ,color="white"),
	axis.title.y = element_text(size=14, face="bold"),
	axis.text.x=element_text(size=14),
	axis.text.y=element_text(size=12))

	#%Cited - percentual relativo ao mais citado
	Cited<-ggplot(melt(cited_perc),
	aes(x=variable,
	y=value,
	fill=variable))+
	geom_bar(position = 'dodge',
	colour = 'white',
	stat = 'summary',
	alpha = 0.8,
	fun.y = 'mean',
	width = 0.7) +
	geom_point(aes(x = variable),
	colour = 'black',
	shape = 19,
	position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
	size = 1.5)+
	scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+
	scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0, 0))+
	labs(title = "% artigos citados", font="% cited", size=9)+
	scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+
	theme_light()+
	theme(
	legend.position = "none",
	plot.title = element_text(color="red", size=16, face="bold.italic"),
	axis.title.x = element_text(size=20, face="bold", ,color="white"),
	axis.title.y = element_text(size=14, face="bold"),
	axis.text.x=element_text(size=14),
	axis.text.y=element_text(size=12))

	#Colaboração Internacional - medido por ...? [Note to self: Perguntar ao MHL e/ou Volpato]
	Colab<-ggplot(melt(Int_Col),
	aes(x=variable,
	y=value,
	fill=variable))+
	geom_bar(position = 'dodge',
	colour = 'white',
	stat = 'summary',
	alpha = 0.8,
	fun.y = 'mean',
	width = 0.7) +
	geom_point(aes(x = variable),
	colour = 'black',
	shape = 19,
	position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
	size = 1.5)+
	scale_x_discrete(name= '\nPaíses', labels = c("UK","EUA", "BR"), expand = c(0.1, 0))+
	scale_y_continuous(name= NULL, limits=c(0,60), breaks=c(0,15,30,45,60), expand = c(0.0, 0))+
	labs(title = "Internacionalização", font="bold", size=9)+
	scale_fill_manual(values=c("#001F7E", "#D00C27", "#009C3B"))+
	theme_light()+
	theme(
	legend.position = "none",
	plot.title = element_text(color="red", size=16, face="bold.italic"),
	axis.title.x = element_text(size=20, face="bold", ,color="white"),
	axis.title.y = element_text(size=14, face="bold"),
	axis.text.x=element_text(size=14),
	axis.text.y=element_text(size=12))

	#Plotando os acima em conjunto usando script descrito no início
	multiplot(CPP, Cited, Colab, cols=3)

	#Barplot % do primeiro lugar
	barplot<-ggplot(melt(perc_prim),
	aes(x=variable,
	y=value,
	fill=variable))+
	stat_summary(fun.y = mean, geom = "bar", size=1, col="black", width=0.5)+
	stat_summary(fun.ymin = function(x) mean(x) - 0,
	fun.ymax = function(x) mean(x) + se(x),
	geom = "errorbar", size=1, width=0.2)+
	geom_point(aes(x = variable),
	colour = 'black',
	shape = 19,
	position = position_jitterdodge(jitter.width = 0.3, dodge.width=0.5),
	size = 3)+
	scale_x_discrete(name= '\nPaíses', labels = c("Portugal", "Argentina", "Brasil"), expand = c(0.1, 0))+
	scale_y_continuous(name= NULL, limits=c(0,100), breaks=c(0,25,50,75,100), expand = c(0.0, 0))+
	labs(title = "% do CPP do Primeiro Lugar (2015-2018)", font="bold", size=9)+
	scale_fill_manual(values=c("#ff0000", "#75aadb", "#009C3B"))+
	theme_light()+
	theme(
	legend.position = "none",
	plot.title = element_text(color="red", size=16, face="bold.italic"),
	axis.title.x = element_blank(),
	axis.title.y = element_text(size=14, face="bold"),
	axis.text.x=element_text(size=14),
	axis.text.y=element_text(size=12))


	#Plots de nuvem de pontos para regressão
	#% do CPP primeiro colocado
	Perc_prim<-ggplot(Temporal1[,1:2], aes(y=Perc_prim, x=Ano))+
	geom_smooth(method = "loess", alpha = 0.05, size = 1, span = 1) +
	geom_point(size = 3.5)+
	scale_x_continuous(name= NULL, limits=c(2007,2019), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0.2))+
	scale_y_continuous(name= NULL, limits=c(0,50), breaks=c(0,15,30,45), expand = c(0, 0))+
	theme_light()+
	labs(title = "% do CPP do Primeiro Lugar", font="bold", size=16)+
	labs(caption="\n* Escala do eixo y cortada em 50% para enfatizar tendência da curva", size=9)+
	theme(
	legend.position = "none",
	plot.title = element_text(color="red", size=16, face="bold.italic"),
	axis.title.x = element_text(size=20, face="bold", ,color="white"),
	axis.title.y = element_text(size=14, face="bold"),
	axis.text.x=element_text(size=14),
	axis.text.y=element_text(size=12))

	#rank-score
	Rank<-ggplot(Temporal1[,c(1,3)], aes(y=rank_score, x=Ano))+
	geom_smooth(method='lm',formula=y~x, se=FALSE)+
	geom_point(size = 3.5)+
	scale_x_continuous(name= NULL, limits=c(2007.5,2018.5), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0))+
	scale_y_continuous(name= NULL, limits=c(0,10), breaks=c(2,4,6,8,10), expand = c(0, 0))+
	theme_light()+
	labs(title = "Rank Score", font="bold", size=16)+
	theme(
	legend.position = "none",
	plot.title = element_text(color="red", size=16, face="bold.italic"),
	axis.title.x = element_text(size=20, face="bold", ,color="white"),
	axis.title.y = element_text(size=14, face="bold"),
	axis.text.x=element_text(size=14),
	axis.text.y=element_text(size=12))

	#Plotando em conjunto
	multiplot(Rank, Perc_prim, cols=2)

	#Relacionando Brasil e Argentina
	Brasil_Argentina<-ggplot(Brasil_e_Argentina)+
	geom_point(aes(y=BR, x=Ano), colour="#009C3B", size = 3)+
	geom_smooth(aes(y=BR, x=Ano), method = "loess", se = FALSE, size = 1, span = 1, colour="#009C3B") +
	geom_point(aes(y=AR, x=Ano), colour="#75aadb", size = 3)+
	geom_smooth(aes(y=AR, x=Ano), method = "loess", se = FALSE, size = 1, span = 1, colour="#75aadb") +
	scale_x_continuous(name= NULL, limits=c(2007,2019), breaks=c(2008, 2010, 2012, 2014, 2016, 2018), expand = c(0, 0.2))+
	scale_y_continuous(name= NULL, limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+
	theme_light()+
	labs(title = "Número de artigos publicados", font="bold", size=16)+
	theme(
	legend.position = "bottom",
	plot.title = element_text(color="red", size=16, face="bold.italic"),
	axis.title.x = element_text(size=20, face="bold", ,color="white"),
	axis.title.y = element_text(size=14, face="bold"),
	axis.text.x=element_text(size=14),
	axis.text.y=element_text(size=12))

	#Plotando em conjunto conforme script descrito no início
	multiplot(barplot, Brasil_Argentina, cols=2)

	# Figura final relacionando numero de papers e impacto por citations
	#Dificuldade encontrada em automaticamente inserir a formula de regressao
	#Primeira tentativa usando plot automatico de parametros de regressao:
	#Solucão parcial obtida em https://stackoverflow.com/questions/7549694/add-regression-line-equation-and-r2-on-graph

	lm_eqn <- function(df){
	m <- lm(y ~ x, df);
	eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
	list(a = format(unname(coef(m)[1]), digits = 2),
	b = format(unname(coef(m)[2]), digits = 2),
	r2 = format(summary(m)$r.squared, digits = 3)))
	as.character(as.expression(eq));
	}

	#Vide que codigo fornecido nao funciona bem, pois exige um formato especifico de dataframe e resulta em slope invertido
	#[Quem souber corrigir, por favor edite o script]
	#Renomeando dados para encaixar formato na formula acima
	df<-Papers_vs_impacto
	names(df)<-c("Ano", "x", "y")

	#Plotando de acordo com primeira tentativa
	ggplot(df, aes(y=y, x=x))+
	geom_point(size=3)+
	geom_smooth(method = "lm", se = FALSE)+
	geom_text(x = 350, y = 7, label = lm_eqn(df), parse = TRUE)+
	scale_x_continuous(name= NULL, limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+
	scale_y_continuous(name= NULL, limits=c(0,50), breaks=c(0,15,30,45), expand = c(0, 0))+
	theme_light()+
	labs(title = "Papers (x) versus Impacto (Y)", font="bold", size=16)+
	theme(
	legend.position = "none",
	plot.title = element_text(color="red", size=16, face="bold.italic"),
	axis.title.x = element_text(size=20, face="bold", ,color="white"),
	axis.title.y = element_text(size=14, face="bold"),
	axis.text.x=element_text(size=14),
	axis.text.y=element_text(size=12))

	#Notar output estranho acima. Ajustado esteticamente abaixo como "gambiarra". Fico no aguardo de sugestoes.
	#Parametros conforme automaticamente providos pelo script acima:
	lb1<-paste("R^2 == ", 0.463)
	lb2<-"y = - 0.032x + 33"

	#Plotando manualmente com estes parametros
	ggplot(Papers_vs_impacto, aes(y=Perc_Primeiro, x=Numero))+
	geom_point(size=3)+
	geom_smooth(method = "lm", se = FALSE)+
	annotate("text", x = 350, y = 4, label =lb1, fontface=3, parse=TRUE)+
	annotate("text", x = 350, y = 7, label = lb2)+
	scale_x_continuous(limits=c(0,500), breaks=c(0,150,300,450), expand = c(0, 0))+
	scale_y_continuous(limits=c(0,50), breaks=c(15,30,45), expand = c(0, 0))+
	theme_light()+
	labs(title = "Correlação entre Quantidade vs. Impacto", y = "% do CPP do Primeiro Lugar", x = "Número de artigos publicados")+
	theme(
	legend.position = "none",
	plot.title = element_text(color="red", size=16, face="bold.italic"),
	axis.title.x = element_text(size=12, face="bold"),
	axis.title.y = element_text(size=13, face="bold"),
	axis.text.x=element_text(size=14),
	axis.text.y=element_text(size=12))

	###################################################################################################################################