Skip to content

Instantly share code, notes, and snippets.

@sillasgonzaga
Last active June 15, 2023 13:17
Show Gist options
  • Star 22 You must be signed in to star a gist
  • Fork 8 You must be signed in to fork a gist
  • Save sillasgonzaga/df996395a2dfb92fbe4d8e34cb06ee61 to your computer and use it in GitHub Desktop.
Save sillasgonzaga/df996395a2dfb92fbe4d8e34cb06ee61 to your computer and use it in GitHub Desktop.
API para baixar quotas de fundos
library(httr)
library(magrittr)
library(xml2)
library(rvest)
url_raw <- "http://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/"
output_file <- "fundos/api/informe_diario.csv"
csv_links <- httr::GET(url_raw) %>%
httr::content("text") %>%
#rvest::html_nodes("td")
#xml_find_all(xpath = ".//*[name()='loc']")
xml2::read_html() %>%
rvest::html_nodes("a") %>%
rvest::html_attr("href") #%>% stringr::str_subset("csv")
csv_links <- csv_links[which(grepl(x = csv_links, pattern = "*.csv"))]
ultimo_csv <- csv_links[length(csv_links)]
download_link <- paste0(url_raw, ultimo_csv)
download.file(download_link, destfile = output_file,
method = "wget")
### api fundos
library(plumber)
# importar arquivo de informe diario
ultimo_inf_diario <- "fundos/api/informe_diario.csv"
df <- read.csv2(ultimo_inf_diario, stringsAsFactors = FALSE)
#* @post /valor_quota
retornar_quota <- function(cnpj_busca){
cota <- as.numeric(df$VL_QUOTA[df$CNPJ_FUNDO == cnpj_busca])
cota <- cota[length(cota)]
cota
}
#"http://dados.cvm.gov.br/dados/FI/CAD/DADOS/inf_cadastral_fi_20180906.csv"
download_cadastro_fundos <- function(download_path = NULL, verbose = TRUE){
# do web scraping to get the last filename
base_url <- "http://dados.cvm.gov.br/dados/FI/CAD/DADOS/"
table <- rvest::html_table(xml2::read_html(base_url))[[1]]
filename <- table[nrow(table)-1, 2]
url_cadastro <- paste0("http://dados.cvm.gov.br/dados/FI/CAD/DADOS/", filename)
# save downloaded file on a temporary file
file <- download_cvm_file(csv_url = url_cadastro, download_path, verbose)
# read the downloaded file
readr::read_delim(file,
delim = ";",
locale = locale(encoding = "ISO-8859-1"))
}
#### Emissores
emissores_informe_diario <- function(download_path = NULL, verbose = TRUE){
# save downloaded file on a temporary file
csv_url <- "http://www.cvm.gov.br/menu/acesso_informacao/serieshistoricas/serieshistoricas/anexos/Informe_diario__Planilha_Agregada__Fundos_555__Total.ods"
file <- download_cvm_file(csv_url, download_path, verbose)
## read downloaded file
x <- read_cvm_file(file, verbose = verbose)
# remove invalid columns
x[, 1:2] <- NULL
# clean colnames
x <- janitor::clean_names(x)
# rename date column
colnames(x)[1] <- "data"
## convert column types
# date column
x[,1] <- lubridate::dmy(x[,1])
for (i in 2:ncol(x)){
x[, i] <- parse_num_ods_col(x[, i])
}
x
}
emissores_ativos_em_carteira <- function(download_path = NULL, verbose = TRUE){
csv_url <- "http://www.cvm.gov.br/menu/acesso_informacao/serieshistoricas/serieshistoricas/anexos/Ativos_em_Carteira__Fundos_555__Total.ods"
file <- download_cvm_file(csv_url, download_path, verbose)
## read downloaded file
x <- read_cvm_file(file, verbose = verbose)
# clean names
x <- janitor::clean_names(x)
# remove first col
x[, 1] <- NULL
# for date columns, replace month names by number
colnames(x)[1] <- "data"
x$data <- gsub("jan", "01", x$data)
x$data <- gsub("fev", "02", x$data)
x$data <- gsub("mar", "03", x$data)
x$data <- gsub("abr", "04", x$data)
x$data <- gsub("mai", "05", x$data)
x$data <- gsub("jun", "06", x$data)
x$data <- gsub("jul", "07", x$data)
x$data <- gsub("ago", "08", x$data)
x$data <- gsub("set", "09", x$data)
x$data <- gsub("out", "10", x$data)
x$data <- gsub("nov", "11", x$data)
x$data <- gsub("dez", "12", x$data)
# convert to Date type
x$data <- as.Date(ISOdate(year = paste0("20", substr(x$data, 4, 5)),
month = substr(x$data, 1, 2),
day = 1))
# numeric columns
for (i in 2:ncol(x)){
x[, i] <- parse_num_ods_col(x[, i])
}
x
}
### caracteristicas dos fundos
emissores_caracteristicas_fundos <- function(download_path = NULL, verbose = TRUE){
csv_url <- "http://www.cvm.gov.br/menu/acesso_informacao/serieshistoricas/serieshistoricas/anexos/Caracteristicas_dos_Fundos__Planilha_Agregada__Fundos_555__Total.ods"
file <- download_cvm_file(csv_url, download_path, verbose)
## read downloaded file
x <- read_cvm_file(file, verbose = verbose)
# clean names
x <- janitor::clean_names(x)
total_fundos_mes <- rowSums(x[, 3:4])
x_final <- cbind(
# vetor de datas
data = x[, 2],
total_fundos = total_fundos_mes,
# proporcao de fics
fic_sim = x[, 4]/total_fundos_mes,
# condominio
condominio_aberto = x[, 9]/total_fundos_mes,
# exclusivo, sim ou nao
exclusivo_sim = x[, 13]/total_fundos_mes,
# qualificado, sim ou nao
qualificado_sim = x[, 17]/total_fundos_mes,
# classe dos fundos
acoes = x[, 25]/total_fundos_mes,
renda_fixa = x[, 26]/total_fundos_mes,
multimercado = x[, 27]/total_fundos_mes,
cambial = x[, 28]/total_fundos_mes
)
x_final <- as.data.frame(x_final)
# convert date column
x_final$data <- as.Date(ISOdate(year = as.numeric(substr(x_final$data, 1, 4)),
month = as.numeric(substr(x_final$data, 5, 6)),
day = 1))
x_final
}
parse_num_ods_col <- function(x){
parsed <- gsub('[R$]', '', x, fixed = FALSE)
parsed <- gsub('.', '', parsed, fixed = TRUE)
parsed <- gsub(',', '.', parsed, fixed = TRUE)
# in case of parenthesis, replace by a negative sign
# replace_parenthesis <- function(x){
# x <- gsub("[()]", "", a)
# paste()
# }
parsed <- ifelse(grepl("[()]", parsed),
paste0("-", gsub("[()]", "", parsed)),
parsed)
as.numeric(parsed)
}
download_cvm_file <- function(csv_url, download_path = NULL, verbose = TRUE){
if (is.null(download_path)){
download_path <- tempfile()
}
if (verbose){
message("Downloading file")
}
download.file(url = csv_url, destfile = download_path, method = "curl")
download_path
}
read_cvm_file <- function(file, sheet = 1, skip = 5, verbose = TRUE){
if (verbose){
message("Reading file")
}
suppressMessages({
readODS::read_ods(path = file, sheet = sheet, skip = skip)
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment