Skip to content

Instantly share code, notes, and snippets.

@rrodrigueznt
Last active December 21, 2019 06:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rrodrigueznt/0312c2bc3015f990ba93e3fea434e86c to your computer and use it in GitHub Desktop.
Save rrodrigueznt/0312c2bc3015f990ba93e3fea434e86c to your computer and use it in GitHub Desktop.
#
library(readxl)
library(openxlsx) # extremelly slow, but it guess much better column types for locally edited
library(dplyr, warn.conflicts = FALSE)
library(stringr)
library(lubridate, warn.conflicts = FALSE)
library(ggplot2)
#
setwd("C:/Users/Ricardo Rodríguez/Universidade de Santiago de Compostela/IGFAE-Metrics - Documents/DataReceivedFromUSC/")
#
setwd("/Users/rrodriguez/Universidade de Santiago de Compostela/igfae-metrics---documents/DataReceivedFromUSC/")
#
## The difference between income and expenses is simple: income is the money your business takes in and expenses are what it spends money on. Your net income is generally your revenue, or all the money coming into your business, minus all of your expenses.
## https://smallbusiness.chron.com/difference-between-income-expense-26088.html
#
readxl::excel_sheets("2019-03-05_Dotacions_IPs_IGFAE_aData.xlsx")
#
IGFAEincome_20190305 <- openxlsx::read.xlsx("2019-03-05_Dotacions_IPs_IGFAE_aData.xlsx", sheet = "IPs IGFAE", detectDates = TRUE)
IGFAEincome_20190320 <- openxlsx::read.xlsx("2019-03-20_Dotacions_IPs_IGFAE_aData.xlsx", sheet = "IPs IGFAE", detectDates = TRUE)
IGFAEincome_20190328 <- openxlsx::read.xlsx("2019-03-28_Dotacions_IPs_IGFAE_aData.xlsx", sheet = "IPs IGFAE", detectDates = TRUE)
#
readxl::excel_sheets("Staff_IGFAE_1999-2018_FINAL.xlsx")
#
IGFAEincomeHc <- openxlsx::read.xlsx("Staff_IGFAE_1999-2018_FINAL.xlsx", sheet = "RRHH1999-2018", detectDates = TRUE)
IGFAEexpensesHc <- openxlsx::read.xlsx("Staff_IGFAE_1999-2018_FINAL.xlsx", sheet = "Contratados", detectDates = TRUE, )
names(IGFAEexpensesHc) <- c('Rexistro','Titulo','TotalIngresos','IP','NIFip','NomeContratado','NIFcon','Categoría','TipoContrato','Prorroga','DtaDesde','DtaAta','DtaBaixa')
#
IGFAEincomeHcSs01 <- IGFAEincomeHc %>% dplyr::filter(stringr::str_detect(TipoContrato, "Cajal|Parga")) %>% dplyr::mutate(DtaFinal = lubridate::year(DtaFinal), DtaInicial = lubridate::year(DtaInicial), Who = stringr::str_extract(NomeInvestigador, "^.{15}")) %>% dplyr::mutate(Titulo=replace(Titulo, DocumentoId=='033332678C', 'RAMÓN Y CAJAL 2017')) %>% dplyr::select('TipoContrato','DocumentoId','Who','Titulo') %>% dplyr::arrange(TipoContrato, DocumentoId) %>% dplyr::distinct() %>% dplyr::filter(!stringr::str_detect(Who, "Casarejos|Ave Pernas")) %>% dplyr::mutate(CallYear = as.numeric(stringr::str_extract_all(Titulo, "[0-9]+"))) %>% dplyr::mutate(CallTitle=dplyr::case_when(stringr::str_detect(TipoContrato, "Cajal") ~ 'Ramón y Cajal' , stringr::str_detect(TipoContrato, "Parga") ~ 'Parga Pondal')) %>% dplyr::add_row(TipoContrato='Contratados Ramón y Cajal' , DocumentoId='044840037L' , Who='Cid Vidal, Xabi' , Titulo='Programa RAMÓN Y CAJAL 2016' , CallTitle='Ramón y Cajal' , CallYear=2016)
#
IGFAEincomeHcSs02 <- IGFAEincomeHcSs01 %>% dplyr::select('CallYear','CallTitle' ) %>% dplyr::add_row(CallYear=2002) %>% dplyr::add_row(CallYear=2008) %>% dplyr::add_row(CallYear=2009) %>% dplyr::add_row(CallYear=2011) %>% dplyr::add_row(CallYear=2013) %>% dplyr::add_row(CallYear=2014) %>% dplyr::add_row(CallYear=2015)
#
g01 <- ggplot(IGFAEincomeHcSs02, aes(as.character(CallYear),fill = CallTitle))
g01 + geom_bar(na.rm = FALSE) +
scale_fill_manual(values=c("Parga Pondal"="slategray2", "Ramón y Cajal"="steelblue2"), guide="none") +
xlab("\ncall") +
ylab("n ") +
theme(plot.title = element_text(size=14, face = "bold"),
legend.title=element_text(size=12),
legend.text=element_text(size=12),
axis.text=element_text(size=12),
axis.title.x=element_text(size=14, face="bold"),
axis.title.y=element_text(size=14, face="bold", angle = 0)
)
#
## Be carefull with the concepts behind gem_histogram() and scale_x_continuous; the scale breaks allows to split the x continuous axis
## but this is not what we want! See g01 above!
## https://stackoverflow.com/questions/14138247/ggplot-geom-bar-vs-geom-histogram
## g02 <- ggplot(IGFAEincomeHcSs02, aes(CallYear,fill = CallTitle))
## g02 + geom_histogram() + scale_x_continuous(breaks = c(2001:2017)) + labs(x = NULL, y = NULL)
#
IGFAEincome6060 <- IGFAEincome_20190305 %>% dplyr::filter(Organica == 6060) %>% dplyr::select('Organica','Funcional','Economica','Anualidade','Importe') %>% dplyr::group_by(Funcional) %>% dplyr::filter(Funcional == 'G1KE')
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment