Skip to content

Instantly share code, notes, and snippets.

@eduardofox2
Last active January 18, 2020 19:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eduardofox2/81946bcb5103715daa9f3add2d36a97c to your computer and use it in GitHub Desktop.
Save eduardofox2/81946bcb5103715daa9f3add2d36a97c to your computer and use it in GitHub Desktop.
R Script for a news press release in Brazil commenting on recent scientific output, focusing here on China vs. Brazil while commenting on the US as an outgroup. [IN PORTUGUESE]
##############Script escrito por Eduardo G P Fox em 10/12/2019 on Ri 386 Version 3.6.2 (Windows 10)###################
#Relativo a um artigo enfatizando o impacto da ciência chinesa em diversas subáreas, comparando ao Brasil e EUA
#Publicado em: https://olivre.com.br/a-incrivel-revolucao-da-ciencia-da-china
#idealizado pelo Prof. Marcelo Hermes de Lima (UnB) e Eduardo G P Fox (aspirante ao Butantan)
#Por favor enviem comentarios sobre este script para meu site pessoal
####(https://ofoxofox.wixsite.com/research) e/ou Twitter @ofoxofox######
#Pacotes R utilizados
library(ggplot2)
library(reshape2)
library(cowplot)
library(magick)
library(ggsci)
library(scales)
library(plyr)
#Caso faltar no GUI local, utilizar install.packages() para rodar o script abaixo
#Caso caracteres em Português não estiverem mostrando corretamente, tem que mudar o locale
# no sistema operacional.
Sys.setlocale("LC_ALL", "pt_BR.UTF-8")
#Nova função usada neste script
namedList <- function(...){
out <- list(...)
for(i in seq(length(out)))
names(out)[i] <- as.character(sys.call()[[i+1]])
out
}
#Dados adquiridos online gratuitamente, por qualquer um que queira verificar as fontes
#https://www.scimagojr.com/countryrank.php
#Número de Artigos Geral do Brasil, EUA e China (base de comparação)
Numeros<-data.frame(
Anos = c(1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017, 2018),
BR = c(9169, 11273, 12748, 13783, 15521, 16562, 19212, 21367, 24053,
27268, 34401, 37257, 42608, 47276, 50974, 55487, 61422, 64711,
68328, 70267, 74624, 78299, 81742),
CH = c(30758, 36191, 42543, 43587, 51868, 66019, 68430, 81906,
117430, 171336, 198131, 225040, 262645, 309103, 344420,
395431, 415776, 456895, 491686, 461547, 496397, 534879, 599386),
US = c(357538, 358351, 355856, 354399, 369874, 372843, 396004,
431318, 465164, 514377, 529509, 531652, 539449, 576717,
600345, 621884, 659053, 665898, 664528, 669588, 669204, 683590,
683003)
)
#Ranks Scores
Rank_scores<-data.frame(
Anos = c(1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017, 2018),
China = c(0.3, 0.25, 0.25, 0.5, 0.5, 0.5, 0.7, 1.1, 0.7, 0.4, 0.8, 1,
1.1, 1.1, 0.7, 1.2, 2.1, 3.3, 3.2, 4.3, 4.2, 5.1, 4.8),
Brasil = c(2.37, 2.37, 2.63, 2.75, 2.25, 2.93, 2.83, 2.89, 3.96, 3.27,
3.33, 3.64, 3.64, 2.91, 3.45, 2.62, 2.95, 1.94, 1.59, 1.64,
1.71, 1.37, NA)
)
#Biotecnologias nos EUA, Brasil e China entre dois trienios
Trienio1<-c("2008", "2009", "2010")
Trienio2<-c("2016", "2017", "2018")
#Paises
Biotec.EUA<-data.frame(
Triênio.1=c("7.8", "6.9", "7.3"),
Triênio.2=c("8.5", "6.8", "8.4"))
Biotec.Brasil<-data.frame(
Triênio.1=c("1.7", "3", "2.9"),
Triênio.2=c("3.5", "2.9", "2.65"))
Biotec.China<-data.frame(
Triênio.1=c("0.5", "1.3", "3.3"),
Triênio.2=c("5", "5.6", "4.5"))
#Reunindo dataframes e transformando em formato longo para plotagem
Biotecs<-namedList(Biotec.EUA, Biotec.Brasil, Biotec.China)
Biotecs.df<-bind_rows(Biotecs, .id = "Pais")
Biotecs.long<-melt(data = Biotecs.df, id.vars = "Pais")
#Dados de areas da China; optei por inserir como caracteres
Quimica<-data.frame(
Qu1=c("3.8", "3.6", "3.8"),
Qu2=c("5.4", "7.3", "6.3")
)
Geologia<-data.frame(
Ge1=c("1.1", "0.5", "0.8"),
Ge2=c("1.6", "2.2", "1.8")
)
Eng_Quim<-data.frame(
EQ1=c("1.4", "2.6", "2.1"),
EQ2=c("6.2", "6.3", "6.6")
)
Fisica<-data.frame(
Fs1=c("1", "0.7", "1.2"),
Fs2=c("1.25", "1.25", "1.2")
)
Materiais<-data.frame(
Ms1=c("2", "1.7", "2.2"),
Ms2=c("5.1", "7.6", "5.7")
)
Engenharia<-data.frame(
Eg1=c("0.9", "0.6", "0.8"),
Eg2=c("3.9", "4.7", "4.6")
)
Matematica<-data.frame(
Ma1=c("0.6", "1.5", "1.4"),
Ma2=c("3.3", "3.7", "3.5")
)
Ambiental<-data.frame(
Am1=c("1.8", "1.5", "3.2"),
Am2=c("4.3", "4.9", "4")
)
Computacao<-data.frame(
Co1=c("0.8", "0.5", "0.6"),
Co2=c("4.6", "5.6", "5.3")
)
Energia<-data.frame(
Eg1=c("1.4", "0.9", "0.8"),
Eg2=c("5.75", "7.2", "6")
)
Quimica<-apply(Quimica, 2, as.numeric)
Geologia<-apply(Geologia, 2, as.numeric)
Eng_Quim<-apply(Eng_Quim, 2, as.numeric)
Fisica<-apply(Fisica, 2, as.numeric)
Materiais<-apply(Materiais, 2, as.numeric)
Engenharia<-apply(Engenharia, 2, as.numeric)
Matematica<-apply(Matematica, 2, as.numeric)
Ambiental<-apply(Ambiental, 2, as.numeric)
Computacao<-apply(Computacao, 2, as.numeric)
Energia<-apply(Energia, 2, as.numeric)
#Organizando em lista nomeada das Areas
Areas<-namedList(Quimica, Geologia, Eng_Quim, Fisica, Materiais,
Engenharia, Matematica, Ambiental, Computacao, Energia)
#Universidades no Ranking Leiden
Rank_Leiden_BR<-data.frame(
Anos = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017),
Leiden_BR = c(665, 690, 691, 654, 696, 706, 731, 679, 711),
Uni_BR = as.factor(c("UFSC", "UFSC", "UFSCar", "UFSC", "UFSC", "UFSC",
"UFBA", "UFBA", "UFC"))
)
Rank_Leiden_CH<-data.frame(
Anos = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017),
Leiden_CH = c(202, 229, 315, 310, 225, 140, 149, 155, 79),
Uni_CH = as.factor(c("Donghua", "Xijian", "Donghua", "Nankai", "Wuhan", "Wuhan",
"Fuzhou", "Fuzhou", "Hunan"))
)
df1<-data.frame(
China_rank=c(1.2, 2.1, 3.3, 3.2, 4.3, 4.2, 5.1),
Leiden_CH=c(315, 310, 225, 140, 149, 155, 79)
)
df2<-data.frame(
top_10_CH<-c(14.3, 12.8, 12.8, 12.9, 11.5, 10.4, 10.3),
Leiden_CH=c(315, 310, 225, 140, 149, 155, 79)
)
top_10_BR<-c("6.3", "6.1", "6.3", "6.9", "6.4", "6.6", "6.4", "7.1", "7.1")
#Fim dos dados brutos
#################################*********
#Plotando as Figuras segundo a estética combinada com o primeiro autor MHL
#Imagens de icons obtidas a partir do website https://www.flaticon.com/packs/countrys-flags
#Icons made by <a href="https://www.flaticon.com/authors/freepik" title="Freepik">Freepik</a> from <a href="https://www.flaticon.com/" title="Flaticon">www.flaticon.com</a></div>
#Usando ícones de bandeiras baixadas no meu computador (Ajustar locale para o de quem usar este script)
brazil_icon<-image_read('C:\\Users\\Eduardo\\Downloads\\brazil.png')
china_icon<-image_read('C:\\Users\\Eduardo\\Downloads\\china.png')
USA_icon<-image_read('C:\\Users\\Eduardo\\Downloads\\united-states.png')
#Plot 1 Artigos publicados ao longo dos anos de paises representativos
Plot_Numero<-ggplot(Numeros)+
geom_point(aes(y=BR, x=Anos), colour="#009C3B", size = 3)+
geom_smooth(aes(y=BR, x=Anos), method = "loess", se = FALSE, size = 1, span = 1, colour="#009C3B", linetype = "dotted") +
geom_point(aes(y=CH, x=Anos), colour="#ff0000", size = 3)+
geom_smooth(aes(y=CH, x=Anos), method = "loess", se = FALSE, size = 1, span = 1, colour="#ff0000", linetype = "dotted") +
geom_point(aes(y=US, x=Anos), colour="blue", size = 3)+
geom_smooth(aes(y=US, x=Anos), method = "loess", se = FALSE, size = 1, span = 1, colour="blue", linetype = "dotted") +
scale_x_continuous(limits=c(1996,2018), breaks=seq(1996,2018,3), expand = c(0.1, 0.1))+
#Tive que inserir uma nova escala como funcao aqui pois em ingles (o default) usam virgula
scale_y_continuous(labels=function(x) format(x, big.mark = ".", decimal.mark = ",", scientific = FALSE),
expand = c(0.1, 0.1))+
theme_light()+
labs(title = "Número de artigos publicados", x = NULL)+
theme(
legend.position = "bottom",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=12, face="bold"),
axis.title.y = element_text(size=14, face="bold",color="white"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Adicionando ícones de países
Numeros<-ggdraw(Plot_Numero) +
draw_image(USA_icon, scale=0.09, x=-0.21, y=0.12)+
draw_image(china_icon, scale=0.09, x=-0.21, y=-0.25)+
draw_image(brazil_icon, scale=0.09, x=0.31, y=-0.36)
#Plot 2 compound rank scores
#2A China só até 2011 (antes do grande salto chines na ciencia)
Plot2A<-ggplot(Rank_scores)+
geom_point(aes(y=China, x=Anos), colour="#ff0000", size = 3)+
geom_smooth(aes(y=China, x=Anos), method = "loess", se = FALSE, size = 1, span = 1, colour="#ff0000", linetype = "dotted") +
scale_x_continuous(limits=c(1996,2011), breaks=seq(1996,2018,3), expand = c(0.1, 0.1))+
scale_y_continuous(limits=c(0,1.5), breaks=seq(0,2,0.2), expand = c(0.1, 0.1))+
theme_light()+
labs(title = "Rank Scores da China antes do \"Salto\"", x = NULL)+
theme(
legend.position = "bottom",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=12, face="bold"),
axis.title.y = element_text(size=14, face="bold",color="white"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#2B mostrando o periodo inteiro de desenvolvimento de artigos da China
Plot2Ba<-ggplot(Rank_scores)+
geom_point(aes(y=Brasil, x=Anos), colour="#009C3B", size = 3)+
geom_smooth(aes(y=Brasil, x=Anos), method = "loess", se = FALSE, size = 1, span = 1, colour="#009C3B", linetype = "dotted") +
geom_point(aes(y=China, x=Anos), colour="#ff0000", size = 3)+
geom_smooth(aes(y=China, x=Anos), method = "loess", se = FALSE, size = 1, span = 1, colour="#ff0000", linetype = "dotted") +
scale_x_continuous(limits=c(1996,2018), breaks=seq(1996,2018,4), expand = c(0.05, 0.05))+
scale_y_continuous(limits=c(0,5.5), breaks=c(1,2,3,4,5), expand = c(0.05, 0.05))+
theme_light()+
labs(title = "Rank Scores ao Longo dos Anos", x = NULL)+
theme(
legend.position = "bottom",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=12, face="bold"),
axis.title.y = element_text(size=14, face="bold",color="white"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
Plot2B<-ggdraw(Plot2Ba)+
draw_image(brazil_icon, scale=0.09, x=-0.34)+
draw_image(china_icon, scale=0.09, x=-0.34, y=-0.25)
#Compondo a Figura 2 em formato final
plot_grid(Plot2A,Plot2B)
#Fig3 no formato de boxplot das 10 areas, comparando diferentes trienios
p.Ambiental<-ggplot(melt(Ambiental), aes(x=Var2, y=value, fill=Var2))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 0.5) +
geom_boxplot(width = 0.7, lwd = 1, fatten = 0.4, outlier.shape=NA)+
geom_jitter(width=0.2, alpha=0.5)+
scale_y_continuous(limits=c(0,8), breaks=seq(0,8,2), expand = c(0.05, 0.5))+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL, y=NULL)+
theme(plot.margin = margin(0, 0.02, 0, 0.02, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "NULL")+
scale_x_discrete(name= NULL, labels = NULL)+
annotate(geom="text", x=1.0, y=7.5, label="Ambiental", color="red", size = 5.7)
p.Computacao<-ggplot(melt(Ambiental), aes(x=Var2, y=(value), fill=Var2))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 0.5) +
geom_boxplot(width = 0.7, lwd = 1, fatten = 0.4, outlier.shape=NA)+
geom_jitter(width=0.2, alpha=0.5)+
scale_y_continuous(limits=c(0,8), breaks=NULL, expand = c(0.05, 0.5))+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL, y=NULL)+
theme(plot.margin = margin(0, 0.02, 0, 0.02, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "NULL")+
scale_x_discrete(name= NULL, labels = NULL)+
annotate(geom="text", x=1.2, y=7.5, label="Computação", color="red", size = 5.7)
p.Energia<-ggplot(melt(Energia), aes(x=Var2, y=(value), fill=Var2))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 0.5) +
geom_boxplot(width = 0.7, lwd = 1, fatten = 0.4, outlier.shape=NA)+
geom_jitter(width=0.2, alpha=0.5)+
scale_y_continuous(limits=c(0,8), breaks=NULL, expand = c(0.05, 0.5))+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL, y=NULL)+
theme(plot.margin = margin(0, 0.02, 0, 0.02, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "NULL")+
scale_x_discrete(name= NULL, labels = NULL)+
annotate(geom="text", x=1.0, y=7.5, label="Energia", color="red", size = 5.7)
p.Eng_Quim<-ggplot(melt(Eng_Quim), aes(x=Var2, y=(value), fill=Var2))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 0.5) +
geom_boxplot(width = 0.7, lwd = 1, fatten = 0.4, outlier.shape=NA)+
geom_jitter(width=0.2, alpha=0.5)+
scale_y_continuous(limits=c(0,8), breaks=NULL, expand = c(0.05, 0.5))+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL, y=NULL)+
theme(plot.margin = margin(0, 0.02, 0, 0.02, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "NULL")+
scale_x_discrete(name= NULL, labels = NULL)+
annotate(geom="text", x=1.2, y=7.5, label="Engenharia\nQuímica", color="red", size = 5.7)
p.Engenharia<-ggplot(melt(Engenharia), aes(x=Var2, y=(value), fill=Var2))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 0.5) +
geom_boxplot(width = 0.7, lwd = 1, fatten = 0.4, outlier.shape=NA)+
scale_y_continuous(limits=c(0,8), breaks=seq(0,8,2),
expand = c(0.05, 0.5), position = "right")+
geom_jitter(width=0.2, alpha=0.5)+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL, y=NULL)+
theme(plot.margin = margin(0, 0.02, 0, 0.02, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "NULL")+
scale_x_discrete(name= NULL, labels = NULL)+
annotate(geom="text", x=1.2, y=7.5, label="Engenharia", color="red", size = 5.7)
p.Fisica<-ggplot(melt(Fisica), aes(x=Var2, y=(value), fill=Var2))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 0.5) +
geom_boxplot(width = 0.7, lwd = 1, fatten = 0.4, outlier.shape=NA)+
scale_y_continuous(limits=c(0,8), breaks=seq(0,8,2),
expand = c(0.05, 0.5))+
geom_jitter(width=0.2, alpha=0.5)+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL, y=NULL)+
theme(plot.margin = margin(0, 0.02, 0, 0.02, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "NULL")+
scale_x_discrete(name= NULL, labels = c("Triênio 1","Triênio 2"))+
annotate(geom="text", x=1.0, y=7.5, label="Física", color="red", size = 5.7)
p.Geologia<-ggplot(melt(Geologia), aes(x=Var2, y=(value), fill=Var2))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 0.5) +
geom_boxplot(width = 0.7, lwd = 1, fatten = 0.4, outlier.shape=NA)+
scale_y_continuous(limits=c(0,8), breaks=NULL, expand = c(0.05, 0.5))+
geom_jitter(width=0.2, alpha=0.5)+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL, y=NULL)+
theme(plot.margin = margin(0, 0.02, 0, 0.02, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "NULL")+
scale_x_discrete(name= NULL, labels = c("Triênio 1","Triênio 2"))+
annotate(geom="text", x=1.0, y=7.5, label="Geologia", color="red", size = 5.7)
p.Matematica<-ggplot(melt(Matematica), aes(x=Var2, y=(value), fill=Var2))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 0.5) +
geom_boxplot(width = 0.7, lwd = 1, fatten = 0.4, outlier.shape=NA)+
scale_y_continuous(limits=c(0,8), breaks=NULL, expand = c(0.05, 0.5))+
geom_jitter(width=0.2, alpha=0.5)+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL, y=NULL)+
theme(plot.margin = margin(0, 0.02, 0, 0.02, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "NULL")+
scale_x_discrete(name= NULL, labels = c("Triênio 1","Triênio 2"))+
annotate(geom="text", x=1.2, y=7.5, label="Matemática", color="red", size = 5.7)
p.Materiais<-ggplot(melt(Materiais), aes(x=Var2, y=(value), fill=Var2))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 0.5) +
geom_boxplot(width = 0.7, lwd = 1, fatten = 0.4, outlier.shape=NA)+
scale_y_continuous(limits=c(0,8), breaks=NULL, expand = c(0.05, 0.5))+
geom_jitter(width=0.2, alpha=0.5)+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL, y=NULL)+
theme(plot.margin = margin(0, 0.02, 0, 0.02, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "NULL")+
scale_x_discrete(name= NULL, labels = c("Triênio 1","Triênio 2"))+
annotate(geom="text", x=1.0, y=7.5, label="Materiais", color="red", size = 5.7)
p.Quimica<-ggplot(melt(Quimica), aes(x=Var2, y=(value), fill=Var2))+
stat_boxplot(geom = "errorbar", width = 0.15, lwd = 0.5) +
geom_boxplot(width = 0.7, lwd = 1, fatten = 0.4, outlier.shape=NA)+
scale_y_continuous(limits=c(0,8), breaks=seq(0,8,2),,
expand = c(0.05, 0.5), position = "right")+
geom_jitter(width=0.2, alpha=0.5)+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL, y=NULL)+
theme(plot.margin = margin(0, 0.02, 0, 0.02, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "NULL")+
scale_x_discrete(name= NULL, labels = c("Triênio 1","Triênio 2"))+
annotate(geom="text", x=1.0, y=7.5, label="Química", color="red", size = 5.7)
#multiplot compondo a figura final
plot_grid(plotlist=mget(ls(pattern = "^p.")), ncol=5)
#Fig 4 Delta (diferenca) do rank-score entre Periodos da China
#Nova funcao para calcular o "DELTA" = diferenca de rank score entre os periodos.
DELTA<-function(x){
diff(rowMeans(t(x)))
}
#Organizando os dados do DELTA
Deltas<-laply(Areas, DELTA)
China_deltas<-data.frame(
Deltas,
names(Areas)
)
#Se precisar ou quiser mudar a ordem das Areas na Figura final
#factor(China_deltas$names.Areas.)<-China_deltas$names.Areas.(levels = c("Química", "Geologia", "Engenharia\nQuímica", "Física", "Materiais", "Engenharia",
#"Matemática", "Ambiental", "Computação", "Energia"))
ggplot(China_deltas)+
geom_bar(aes(x=names.Areas., y=Deltas, fill = names(Areas)),stat="identity", width=0.6)+
scale_fill_jco()+
scale_y_continuous(name= NULL,
# limits=c(0,10),
#breaks=c(0,2.5,5.0,7.5,10.0),
expand = c(0, 0.1))+
scale_x_discrete(name= NULL, labels = c("Química", "Geologia", "Engenharia\nQuímica", "Física", "Materiais", "Engenharia",
"Matemática", "Ambiental", "Computação", "Energia"), expand = c(0.1, 0)
)+
theme_light()+
labs(title = 'Diferenças no rank score de áreas de pesquisa chinesa entre dois triênios', font="bold", size=16, fill = "Areas")+
theme(
legend.position = "none",
plot.title = element_text(color="red", size=15, face="bold.italic"),
axis.title.x = element_text(size=20, face="bold"),
axis.title.y = element_text(size=20, face="bold"),
axis.text.x=element_text(size=14, angle = 45, hjust = 1),
axis.text.y=element_text(size=12))
#Fig 5 Rank Score Brasil 2017 vs. China media 2015-2017
#Nova funcao para calcular o Delta da China
DELTA2<-function(x){
mean(x[,2])
}
BR_2017<-data.frame(
names(Areas),
rank_score_BR=c(1.5, 3.5, 3.5, 2.4, 1, 1.5, 2, 3, 2.6, 2.3)
)
#Aplicando nas Areas China
Deltas2<-laply(Areas, DELTA2)
China_BR_deltas<-data.frame(
China=Deltas2,
Brasil=c(1.5, 3.5, 3.5, 2.4, 1, 1.5, 2, 3, 2.6, 2.3),
names(Areas)
)
#Imagem para publicar (draft)
ggplot(melt(China_BR_deltas), aes(x=names.Areas., y=value, fill = variable))+
geom_bar(stat="identity", position = "dodge", width=0.6)+
scale_fill_manual(values=c("#ff0000", "#009C3B"))+
scale_x_discrete(name= 'Areas', expand = c(0.1, 0), labels = c("Química", "Geologia", "Engenharia\nQuímica", "Física", "Materiais", "Engenharia",
"Matemática", "Ambiental", "Computação", "Energia"))+
scale_y_continuous(name= NULL, limits=c(0,8),
breaks=c(0,2.5,5.0,7.5,10.0),
expand = c(0, 0))+
labs(title = 'Rank-Scores da China 2016-2018 vs. Brasil 2017, em diferentes áreas', x="NULL", font="bold", size=16, fill = "Areas")+
theme_light()+
theme(
legend.position = c(0.1,0.85),
legend.title = element_blank(),
plot.title = element_text(color="red", size=14, face="bold.italic"),
axis.title.x = element_blank(),
axis.title.y = element_text(size=20, face="bold"),
axis.text.x=element_text(size=14, angle = 45, hjust = 1),
axis.text.y=element_text(size=12))
#Fig 6 Boxplots mostrando os Deltas em biotecnologia no Brasil, China e EUA
Biot.Brasil<-ggplot(Biotecs.long[Biotecs.long$Pais=="Biotec.Brasil",], aes(x=variable, y=as.numeric(value), fill=variable))+
geom_boxplot(width = 0.6, lwd = 1.2, fatten = 1, outlier.shape=NA)+
scale_y_continuous(limits=c(0,10), breaks=c(0,2,4,6,8), expand = c(0.04, 0.04))+
geom_jitter(width=0.1,alpha=0.6)+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL)+
theme(plot.margin = margin(0, 0.5, 0, 0, "cm"),
legend.position = "NULL",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold",color="white"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))+
scale_x_discrete(name= NULL, labels = c("Triênio 1","Triênio 2"), expand = c(0.2, 0.2))+
annotate(geom="text", x=1.6, y=9.2, label="Biotec Brasil", color="red", size = 5)
PP.Brasil<-ggdraw(Biot.Brasil) +
draw_image(brazil_icon, scale=0.14, x=-0.25, y=0.42)
Biot.China<-ggplot(Biotecs.long[Biotecs.long$Pais=="Biotec.China",], aes(x=variable, y=as.numeric(value), fill=variable))+
geom_boxplot(width = 0.6, lwd = 1.2, fatten = 1, outlier.shape=NA)+
scale_y_continuous(limits=c(0,10), breaks=c(0,2,4,6,8), expand = c(0.04, 0.04))+
geom_jitter(width=0.1,alpha=0.6)+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL)+
theme(plot.margin = margin(0, 0.5, 0, 0, "cm"),
legend.position = "NULL",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold",color="white"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))+
scale_x_discrete(name= NULL, labels = c("Triênio 1","Triênio 2"), expand = c(0.2, 0.2))+
annotate(geom="text", x=1.6, y=9.2, label="Biotec China", color="red", size = 5)
PP.China<-ggdraw(Biot.China) +
draw_image(china_icon, scale=0.14, x=-0.25, y=0.42)
Biot.EUA<-ggplot(Biotecs.long[Biotecs.long$Pais=="Biotec.EUA",], aes(x=variable, y=as.numeric(value), fill=variable))+
geom_boxplot(width = 0.6, lwd = 1.2, fatten = 1, outlier.shape=NA)+
scale_y_continuous(limits=c(0,10), breaks=c(0,2,4,6,8), expand = c(0.04, 0.04))+
geom_jitter(width=0.1,alpha=0.6)+
scale_fill_jco()+
theme_light()+
labs(title = NULL, x=NULL)+
theme(plot.margin = margin(0, 0.5, 0, 0, "cm"),
legend.position = "NULL",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold",color="white"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))+
scale_x_discrete(name= NULL, labels = c("Triênio 1","Triênio 2"), expand = c(0.2, 0.2))+
annotate(geom="text", x=1.6, y=9.2, label="Biotec EUA", color="red", size = 5)
PP.EUA<-ggdraw(Biot.EUA) +
draw_image(USA_icon, scale=0.14, x=-0.25, y=0.42)
#Compondo a imagem final
plot_grid(plotlist= mget(ls(pattern= "^PP.")), nrow= 1)
#Fig 7 mostrando a top no tanking Leiden Universities da China & Brasil
#Universidades da China -- melhor colocadas na interface visual do ranking Leiden
Uni.CH<-ggplot(Rank_Leiden_CH)+
geom_point(aes(y=Leiden_CH, x=Anos, label=Uni_CH), colour="#ff0000", size = 3)+
geom_text(aes(y=Leiden_CH, x=Anos, label=Uni_CH),hjust=-0.1, vjust=-0.1)+
geom_smooth(aes(y=Leiden_CH, x=Anos), size = 1, colour="#ff0000", linetype = "dashed", se=FALSE)+
scale_y_continuous(limits=c(0,400), breaks=seq(0,400,50), expand = c(0.1, 0.1))+
scale_x_continuous(limits=c(2011,2017), breaks=seq(2011,2018,2), expand = c(0.1, 0.1))+
theme_light()+
labs(title = "Ranking Leiden: Univ. chinesas", x=NULL)+
theme(plot.margin = margin(0, 1, 0, 0, "cm"),
legend.position = "NULL",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold",color="white"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Universidades do Brasil -- melhor colocadas na interface visual do ranking Leiden
Uni.BR<-ggplot(Rank_Leiden_BR, aes(y=Leiden_BR, x=Anos, label=Uni_BR))+
geom_point(colour="#009C3B", size = 3)+
geom_text(hjust=-0.1, vjust=-0.1)+
geom_smooth(size = 1, colour="#009C3B", linetype = "dashed", se=FALSE)+
scale_y_continuous(limits=c(400,750), breaks=seq(0,750,50), expand = c(0.1, 0.1))+
scale_x_continuous(limits=c(2011,2017), breaks=seq(2011,2018,2), expand = c(0.1, 0.1))+
theme_light()+
labs(title = "Ranking Leiden: Univ. brasileiras", x=NULL)+
theme(plot.margin = margin(0, 1, 0, 0, "cm"),
legend.position = "NULL",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold",color="white"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))
#Compondo a imagem final para publicar
plot_grid(plotlist= mget(ls(pattern= "^Uni.")), nrow= 1)
#Fig 8 composta correlacionando rank scores vs Leiden & rank score vs impacto
#[Estou com dificuldades de como aplicar o valor de R na imagem ! Por favor, leitores: sugestoes? ]
#As Tentativas abaixo falharam...
ddply(df,.(group),lm_eqn)
lm_eqn2 <- function(df2){
m <- lm(Leiden_BR ~ China_rank, df1);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(unname(coef(m)[1]), digits = 2),
b = format(unname(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
#Fig 8A
Fig8A<-ggplot(df1, aes(x=China_rank, y=Leiden_CH))+
geom_point(colour="black", size = 3)+
geom_smooth(method = lm, size = 1, colour="#ff0000", linetype = "dashed", se=FALSE)+
scale_y_continuous(limits=c(0,350), breaks=seq(0,350,50), expand = c(0.1, 0.1))+
#scale_x_continuous(limits=c(2011,2017), breaks=seq(2011,2017,1), expand = c(0.1, 0.1))+
theme_light()+
labs(title = "Rank Score vs. Ranking Leiden: Univ. chinesas", x=NULL)+
theme(plot.margin = margin(0, 1, 0, 0, "cm"),
legend.position = "NULL",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold",color="white"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))+
annotate(geom="text", x=2, y=50, label=expression(paste("R"^2,"=0.86")), size = 4)
#Fig 8B
Fig8B<-ggplot(df2, aes(x=China_rank, y=top_10_CH))+
geom_point(colour="black", size = 3)+
geom_smooth(method = lm, size = 1, colour="#ff0000", linetype = "dashed", se=FALSE)+
scale_y_continuous(limits=c(0,15), breaks=seq(0,15,5), expand = c(0.1, 0.1))+
#scale_x_continuous(limits=c(2011,2017), breaks=seq(2011,2017,1), expand = c(0.1, 0.1))+
theme_light()+
labs(title = "Rank Score vs. top.10%: Univ. chinesas", x=NULL)+
theme(plot.margin = margin(0, 1, 0, 0, "cm"),
legend.position = "NULL",
plot.title = element_text(color="red", size=16, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold",color="white"),
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=12))+
annotate(geom="text", x=2, y=2.5, label=expression(paste("R"^2,"=0.8463")), size = 4)
#Compondo a Figura 8 final
plot_grid(Fig8A,Fig8B)
########################################################################################
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment