sillasgonzaga/api_quotas_fundos.R

## api_quotas_fundos.R
library(httr)
library(magrittr)
library(xml2)
library(rvest)

url_raw <- "http://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/"
output_file <- "fundos/api/informe_diario.csv"

csv_links <- httr::GET(url_raw) %>%
  httr::content("text") %>%
  #rvest::html_nodes("td")
  #xml_find_all(xpath = ".//*[name()='loc']")
  xml2::read_html() %>%
  rvest::html_nodes("a") %>%
  rvest::html_attr("href") #%>% stringr::str_subset("csv")

csv_links <- csv_links[which(grepl(x = csv_links, pattern = "*.csv"))]
ultimo_csv <- csv_links[length(csv_links)]

download_link <- paste0(url_raw, ultimo_csv)

download.file(download_link, destfile = output_file,
              method = "wget")

### api fundos
library(plumber)

# importar arquivo de informe diario
ultimo_inf_diario <- "fundos/api/informe_diario.csv"

df <- read.csv2(ultimo_inf_diario, stringsAsFactors = FALSE)

#* @post /valor_quota
retornar_quota <- function(cnpj_busca){
  cota <- as.numeric(df$VL_QUOTA[df$CNPJ_FUNDO == cnpj_busca])
  cota <- cota[length(cota)]
  cota
}


## cadastro_fundos.R

#"http://dados.cvm.gov.br/dados/FI/CAD/DADOS/inf_cadastral_fi_20180906.csv"


download_cadastro_fundos <- function(download_path = NULL, verbose = TRUE){
  # do web scraping to get the last filename
  base_url <- "http://dados.cvm.gov.br/dados/FI/CAD/DADOS/"
  table <- rvest::html_table(xml2::read_html(base_url))[[1]]
  filename <- table[nrow(table)-1, 2]

  url_cadastro <- paste0("http://dados.cvm.gov.br/dados/FI/CAD/DADOS/", filename)
  # save downloaded file on a temporary file
  file <- download_cvm_file(csv_url = url_cadastro, download_path, verbose)
  # read the downloaded file
  readr::read_delim(file,
                    delim = ";",
                    locale = locale(encoding = "ISO-8859-1"))
}


## emissores.R
#### Emissores
emissores_informe_diario <- function(download_path = NULL, verbose = TRUE){
  # save downloaded file on a temporary file
  csv_url <- "http://www.cvm.gov.br/menu/acesso_informacao/serieshistoricas/serieshistoricas/anexos/Informe_diario__Planilha_Agregada__Fundos_555__Total.ods"
  file <- download_cvm_file(csv_url, download_path, verbose)

  ## read downloaded file
  x <- read_cvm_file(file, verbose = verbose)
  # remove invalid columns
  x[, 1:2] <- NULL
  # clean colnames
  x <- janitor::clean_names(x)
  # rename date column
  colnames(x)[1] <- "data"

  ## convert column types
  # date column
  x[,1] <- lubridate::dmy(x[,1])


  for (i in 2:ncol(x)){
    x[, i] <- parse_num_ods_col(x[, i])
  }

  x
}

emissores_ativos_em_carteira <- function(download_path = NULL, verbose = TRUE){
  csv_url <- "http://www.cvm.gov.br/menu/acesso_informacao/serieshistoricas/serieshistoricas/anexos/Ativos_em_Carteira__Fundos_555__Total.ods"
  file <- download_cvm_file(csv_url, download_path, verbose)
  ## read downloaded file
  x <- read_cvm_file(file, verbose = verbose)
  # clean names
  x <- janitor::clean_names(x)
  # remove first col
  x[, 1] <- NULL

  # for date columns, replace month names by number
  colnames(x)[1] <- "data"
  x$data <- gsub("jan", "01", x$data)
  x$data <- gsub("fev", "02", x$data)
  x$data <- gsub("mar", "03", x$data)
  x$data <- gsub("abr", "04", x$data)
  x$data <- gsub("mai", "05", x$data)
  x$data <- gsub("jun", "06", x$data)
  x$data <- gsub("jul", "07", x$data)
  x$data <- gsub("ago", "08", x$data)
  x$data <- gsub("set", "09", x$data)
  x$data <- gsub("out", "10", x$data)
  x$data <- gsub("nov", "11", x$data)
  x$data <- gsub("dez", "12", x$data)

  # convert to Date type
  x$data <- as.Date(ISOdate(year = paste0("20", substr(x$data, 4, 5)),
                            month = substr(x$data, 1, 2),
                            day = 1))

  # numeric columns
  for (i in 2:ncol(x)){
    x[, i] <- parse_num_ods_col(x[, i])
  }

  x
}

### caracteristicas dos fundos

emissores_caracteristicas_fundos <- function(download_path = NULL, verbose = TRUE){
  csv_url <- "http://www.cvm.gov.br/menu/acesso_informacao/serieshistoricas/serieshistoricas/anexos/Caracteristicas_dos_Fundos__Planilha_Agregada__Fundos_555__Total.ods"
  file <- download_cvm_file(csv_url, download_path, verbose)
  ## read downloaded file
  x <- read_cvm_file(file, verbose = verbose)
  # clean names
  x <- janitor::clean_names(x)

  total_fundos_mes <- rowSums(x[, 3:4])

  x_final <- cbind(
    # vetor de datas
    data = x[, 2],
    total_fundos = total_fundos_mes,
    # proporcao de fics
    fic_sim = x[, 4]/total_fundos_mes,
    # condominio
    condominio_aberto = x[, 9]/total_fundos_mes,
    # exclusivo, sim ou nao
    exclusivo_sim = x[, 13]/total_fundos_mes,
    # qualificado, sim ou nao
    qualificado_sim = x[, 17]/total_fundos_mes,
    # classe dos fundos
    acoes = x[, 25]/total_fundos_mes,
    renda_fixa = x[, 26]/total_fundos_mes,
    multimercado = x[, 27]/total_fundos_mes,
    cambial = x[, 28]/total_fundos_mes
  )

  x_final <- as.data.frame(x_final)
  # convert date column
  x_final$data <- as.Date(ISOdate(year = as.numeric(substr(x_final$data, 1, 4)),
                                  month = as.numeric(substr(x_final$data, 5, 6)),
                                  day  = 1))
  x_final

}


## helpers.R
parse_num_ods_col <- function(x){
  parsed <- gsub('[R$]', '', x, fixed = FALSE)
  parsed <- gsub('.', '', parsed, fixed = TRUE)
  parsed <- gsub(',', '.', parsed, fixed = TRUE)
  # in case of parenthesis, replace by a negative sign
  # replace_parenthesis <- function(x){
  #   x <- gsub("[()]", "", a)
  #   paste()
  # }
  parsed <- ifelse(grepl("[()]", parsed),
                   paste0("-", gsub("[()]", "", parsed)),
                   parsed)

  as.numeric(parsed)
}


download_cvm_file <- function(csv_url, download_path = NULL, verbose = TRUE){

  if (is.null(download_path)){
    download_path <- tempfile()
  }

  if (verbose){
    message("Downloading file")
  }

  download.file(url = csv_url, destfile = download_path, method = "curl")

  download_path
}

read_cvm_file <- function(file, sheet = 1, skip = 5, verbose = TRUE){
  if (verbose){
    message("Reading file")
  }
  suppressMessages({
    readODS::read_ods(path = file, sheet = sheet, skip = skip)
  })
}
	library(httr)
	library(magrittr)
	library(xml2)
	library(rvest)

	url_raw <- "http://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/"
	output_file <- "fundos/api/informe_diario.csv"

	csv_links <- httr::GET(url_raw) %>%
	httr::content("text") %>%
	#rvest::html_nodes("td")
	#xml_find_all(xpath = ".//*[name()='loc']")
	xml2::read_html() %>%
	rvest::html_nodes("a") %>%
	rvest::html_attr("href") #%>% stringr::str_subset("csv")

	csv_links <- csv_links[which(grepl(x = csv_links, pattern = "*.csv"))]
	ultimo_csv <- csv_links[length(csv_links)]

	download_link <- paste0(url_raw, ultimo_csv)

	download.file(download_link, destfile = output_file,
	method = "wget")

	### api fundos
	library(plumber)

	# importar arquivo de informe diario
	ultimo_inf_diario <- "fundos/api/informe_diario.csv"

	df <- read.csv2(ultimo_inf_diario, stringsAsFactors = FALSE)

	#* @post /valor_quota
	retornar_quota <- function(cnpj_busca){
	cota <- as.numeric(df$VL_QUOTA[df$CNPJ_FUNDO == cnpj_busca])
	cota <- cota[length(cota)]
	cota
	}

	#"http://dados.cvm.gov.br/dados/FI/CAD/DADOS/inf_cadastral_fi_20180906.csv"


	download_cadastro_fundos <- function(download_path = NULL, verbose = TRUE){
	# do web scraping to get the last filename
	base_url <- "http://dados.cvm.gov.br/dados/FI/CAD/DADOS/"
	table <- rvest::html_table(xml2::read_html(base_url))[[1]]
	filename <- table[nrow(table)-1, 2]

	url_cadastro <- paste0("http://dados.cvm.gov.br/dados/FI/CAD/DADOS/", filename)
	# save downloaded file on a temporary file
	file <- download_cvm_file(csv_url = url_cadastro, download_path, verbose)
	# read the downloaded file
	readr::read_delim(file,
	delim = ";",
	locale = locale(encoding = "ISO-8859-1"))
	}
	#### Emissores
	emissores_informe_diario <- function(download_path = NULL, verbose = TRUE){
	# save downloaded file on a temporary file
	csv_url <- "http://www.cvm.gov.br/menu/acesso_informacao/serieshistoricas/serieshistoricas/anexos/Informe_diario__Planilha_Agregada__Fundos_555__Total.ods"
	file <- download_cvm_file(csv_url, download_path, verbose)

	## read downloaded file
	x <- read_cvm_file(file, verbose = verbose)
	# remove invalid columns
	x[, 1:2] <- NULL
	# clean colnames
	x <- janitor::clean_names(x)
	# rename date column
	colnames(x)[1] <- "data"

	## convert column types
	# date column
	x[,1] <- lubridate::dmy(x[,1])


	for (i in 2:ncol(x)){
	x[, i] <- parse_num_ods_col(x[, i])
	}

	x
	}

	emissores_ativos_em_carteira <- function(download_path = NULL, verbose = TRUE){
	csv_url <- "http://www.cvm.gov.br/menu/acesso_informacao/serieshistoricas/serieshistoricas/anexos/Ativos_em_Carteira__Fundos_555__Total.ods"
	file <- download_cvm_file(csv_url, download_path, verbose)
	## read downloaded file
	x <- read_cvm_file(file, verbose = verbose)
	# clean names
	x <- janitor::clean_names(x)
	# remove first col
	x[, 1] <- NULL

	# for date columns, replace month names by number
	colnames(x)[1] <- "data"
	x$data <- gsub("jan", "01", x$data)
	x$data <- gsub("fev", "02", x$data)
	x$data <- gsub("mar", "03", x$data)
	x$data <- gsub("abr", "04", x$data)
	x$data <- gsub("mai", "05", x$data)
	x$data <- gsub("jun", "06", x$data)
	x$data <- gsub("jul", "07", x$data)
	x$data <- gsub("ago", "08", x$data)
	x$data <- gsub("set", "09", x$data)
	x$data <- gsub("out", "10", x$data)
	x$data <- gsub("nov", "11", x$data)
	x$data <- gsub("dez", "12", x$data)

	# convert to Date type
	x$data <- as.Date(ISOdate(year = paste0("20", substr(x$data, 4, 5)),
	month = substr(x$data, 1, 2),
	day = 1))

	# numeric columns
	for (i in 2:ncol(x)){
	x[, i] <- parse_num_ods_col(x[, i])
	}

	x
	}

	### caracteristicas dos fundos

	emissores_caracteristicas_fundos <- function(download_path = NULL, verbose = TRUE){
	csv_url <- "http://www.cvm.gov.br/menu/acesso_informacao/serieshistoricas/serieshistoricas/anexos/Caracteristicas_dos_Fundos__Planilha_Agregada__Fundos_555__Total.ods"
	file <- download_cvm_file(csv_url, download_path, verbose)
	## read downloaded file
	x <- read_cvm_file(file, verbose = verbose)
	# clean names
	x <- janitor::clean_names(x)

	total_fundos_mes <- rowSums(x[, 3:4])

	x_final <- cbind(
	# vetor de datas
	data = x[, 2],
	total_fundos = total_fundos_mes,
	# proporcao de fics
	fic_sim = x[, 4]/total_fundos_mes,
	# condominio
	condominio_aberto = x[, 9]/total_fundos_mes,
	# exclusivo, sim ou nao
	exclusivo_sim = x[, 13]/total_fundos_mes,
	# qualificado, sim ou nao
	qualificado_sim = x[, 17]/total_fundos_mes,
	# classe dos fundos
	acoes = x[, 25]/total_fundos_mes,
	renda_fixa = x[, 26]/total_fundos_mes,
	multimercado = x[, 27]/total_fundos_mes,
	cambial = x[, 28]/total_fundos_mes
	)

	x_final <- as.data.frame(x_final)
	# convert date column
	x_final$data <- as.Date(ISOdate(year = as.numeric(substr(x_final$data, 1, 4)),
	month = as.numeric(substr(x_final$data, 5, 6)),
	day = 1))
	x_final

	}
	parse_num_ods_col <- function(x){
	parsed <- gsub('[R$]', '', x, fixed = FALSE)
	parsed <- gsub('.', '', parsed, fixed = TRUE)
	parsed <- gsub(',', '.', parsed, fixed = TRUE)
	# in case of parenthesis, replace by a negative sign
	# replace_parenthesis <- function(x){
	# x <- gsub("[()]", "", a)
	# paste()
	# }
	parsed <- ifelse(grepl("[()]", parsed),
	paste0("-", gsub("[()]", "", parsed)),
	parsed)

	as.numeric(parsed)
	}


	download_cvm_file <- function(csv_url, download_path = NULL, verbose = TRUE){

	if (is.null(download_path)){
	download_path <- tempfile()
	}

	if (verbose){
	message("Downloading file")
	}

	download.file(url = csv_url, destfile = download_path, method = "curl")

	download_path
	}

	read_cvm_file <- function(file, sheet = 1, skip = 5, verbose = TRUE){
	if (verbose){
	message("Reading file")
	}
	suppressMessages({
	readODS::read_ods(path = file, sheet = sheet, skip = skip)
	})
	}