Created
January 30, 2024 13:00
-
-
Save beatrizmilz/257f7ff7591607a0b9a4cf430857fe32 to your computer and use it in GitHub Desktop.
R: Como ler arquivos .dbc? Como usar o pacote read.dbc?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Dúvida do Nathan Oliveira: Como ler arquivos .dbc? | |
# Existe um pacote chamado read.dbc, | |
# ele é usado pelo pacote microdatasus | |
# Referência: https://github.com/danicat/read.dbc | |
# esse pacote NÃO está disponível no CRAN | |
# precisa instalar o pacote remotes | |
# o remotes tem funções para instalar pacotes direto do GitHub | |
install.packages("remotes") | |
# Instalar o pacote read.dbc | |
# atenção: quando solicitado, aceite instalar os pacotes | |
# necessários e suas atualizações :) | |
remotes::install_github("danicat/read.dbc") | |
# Você me enviou uma pasta com os arquivos que deseja ler. | |
# Altere o caminho aqui, para o caminho onde está a sua pasta. | |
pasta_com_arquivos_dbc <- "Downloads/TB_TABWIN_2000 a 2023/" | |
# Checando se nessa pasta tem arquivos .dbc | |
list.files(pasta_com_arquivos_dbc) | |
# [1] "TUBEBR01.dbc" "TUBEBR02.dbc" "TUBEBR03.dbc" | |
# [4] "TUBEBR04.dbc" "TUBEBR05.dbc" "TUBEBR06.dbc" | |
# [7] "TUBEBR07.dbc" "TUBEBR08.dbc" "TUBEBR09.dbc" | |
# [10] "TUBEBR10.dbc" "TUBEBR11.dbc" "TUBEBR12.dbc" | |
# [13] "TUBEBR13.dbc" "TUBEBR14.dbc" "TUBEBR15.dbc" | |
# Criando um vetor com o caminho para esses arquivos .dbc | |
arquivos_dbc <- list.files(pasta_com_arquivos_dbc, | |
full.names = TRUE, | |
pattern = ".dbc") | |
arquivos_dbc | |
# [1] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR01.dbc" | |
# [2] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR02.dbc" | |
# [3] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR03.dbc" | |
# [4] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR04.dbc" | |
# [5] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR05.dbc" | |
# [6] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR06.dbc" | |
# [7] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR07.dbc" | |
# [8] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR08.dbc" | |
# [9] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR09.dbc" | |
# [10] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR10.dbc" | |
# [11] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR11.dbc" | |
# [12] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR12.dbc" | |
# [13] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR13.dbc" | |
# [14] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR14.dbc" | |
# [15] "Downloads/TB_TABWIN_2000 a 2023//TUBEBR15.dbc" | |
# Agora vamos usar a função read.dbc::read.dbc para | |
# ler cada um desses arquivos. | |
# Vamos usar o purrr! | |
# isso pode demorar um pouco! | |
# TENHA PACIÊNCIA! E se o seu computador não tiver boa | |
# memória RAM, essa abordagem pode não funcionar. | |
# Caso dê erro/o RStudio pare de funcionar/etc, | |
# podemos pensar em uma outra forma que abre, "limpa", e salva | |
# um arquivo por vez! | |
dbc_lista <- purrr::map(arquivos_dbc, read.dbc::read.dbc) | |
# O purrr::map retorna uma lista. Vamos transformar em uma tibble. | |
tuberculose_tibble <- dbc_lista |> | |
purrr::list_rbind() |> | |
tibble::as_tibble() | |
# Vendo o conteúdo | |
dplyr::glimpse(tuberculose_tibble) | |
# Rows: 1,317,912 | |
# Columns: 97 | |
# $ TP_NOT <fct> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,… | |
# $ ID_AGRAVO <fct> A169, A169, A169, A169, A169, A169, A169,… | |
# $ DT_NOTIFIC <date> 2001-01-04, 2001-02-20, 2001-01-12, 2001… | |
# $ NU_ANO <fct> 2001, 2001, 2001, 2001, 2001, 2001, 2001,… | |
# $ SG_UF_NOT <fct> 41, 41, 26, 15, 13, 13, 52, 52, 33, 33, 3… | |
# $ ID_MUNICIP <fct> 410420, 410690, 260570, 150140, 130120, 1… | |
# $ ID_REGIONA <fct> 1356, 1356, 1506, 1484, NA, NA, 1784, 178… | |
# $ DT_DIAG <date> 2001-01-04, 2001-02-20, 2001-01-12, 2001… | |
# $ ANO_NASC <fct> 1961, 1974, 1960, 1975, 1975, 1981, 1974,… | |
# $ NU_IDADE_N <int> 4039, 4026, 4040, 4025, 4025, 4019, 4026,… | |
# $ CS_SEXO <fct> F, F, M, M, F, F, F, M, M, M, M, M, M, F,… | |
# $ CS_GESTANT <fct> 9, 9, 6, 6, 9, 9, 9, 6, 6, 6, 6, 6, 6, 9,… | |
# $ CS_RACA <fct> NA, NA, NA, 4, 4, NA, NA, NA, NA, NA, NA,… | |
# $ CS_ESCOL_N <fct> 3, 3, 9, 9, 9, 9, 9, 0, 0, 10, 10, 3, 3, … | |
# $ SG_UF <fct> 41, 41, 26, 15, 13, 13, 52, 52, 33, 33, 3… | |
# $ ID_MN_RESI <fct> 410420, 410420, 260570, 150140, 130120, 1… | |
# $ ID_RG_RESI <fct> 1356, 1356, 1506, 1484, NA, NA, 1784, 178… | |
# $ ID_PAIS <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,… | |
# $ NDUPLIC_N <fct> 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… | |
# $ IN_VINCULA <fct> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… | |
# $ DT_DIGITA <date> 1899-12-30, 1899-12-30, 1899-12-30, 1899… | |
# $ DT_TRANSUS <date> 1899-12-30, 1899-12-30, 1899-12-30, 1899… | |
# $ DT_TRANSDM <date> 1899-12-30, 1899-12-30, 1899-12-30, 1899… | |
# $ DT_TRANSSM <date> 2007-12-10, 2007-12-10, 1899-12-30, 2008… | |
# $ DT_TRANSRM <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, … | |
# $ DT_TRANSRS <date> 1899-12-30, 1899-12-30, 2008-01-03, 1899… | |
# $ DT_TRANSSE <date> 2008-08-15, 2008-08-15, 1899-12-30, 1899… | |
# $ CS_FLXRET <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ FLXRECEBI <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ MIGRADO_W <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,… | |
# $ ID_OCUPA_N <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ TRATAMENTO <fct> 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,… | |
# $ INSTITUCIO <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ RAIOX_TORA <fct> 1, 1, 4, 1, 4, 4, 1, 1, 1, 1, 1, 1, 4, 1,… | |
# $ TESTE_TUBE <fct> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3,… | |
# $ FORMA <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,… | |
# $ EXTRAPU1_N <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ EXTRAPU2_N <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ EXTRAPUL_O <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ AGRAVAIDS <fct> NA, NA, NA, 9, 9, NA, NA, NA, NA, NA, NA,… | |
# $ AGRAVALCOO <fct> NA, NA, NA, 9, 9, NA, NA, NA, NA, NA, NA,… | |
# $ AGRAVDIABE <fct> NA, NA, NA, 9, 9, NA, NA, NA, NA, NA, NA,… | |
# $ AGRAVDOENC <fct> NA, NA, NA, 9, 9, NA, NA, NA, NA, NA, NA,… | |
# $ AGRAVOUTRA <fct> NA, NA, NA, 9, 9, NA, NA, NA, NA, NA, NA,… | |
# $ AGRAVOUTDE <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ BACILOSC_E <fct> 1, 2, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, 1,… | |
# $ BACILOS_E2 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ BACILOSC_O <fct> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,… | |
# $ CULTURA_ES <fct> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,… | |
# $ CULTURA_OU <fct> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, NA, 4… | |
# $ HIV <fct> 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4,… | |
# $ HISTOPATOL <fct> 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, NA, 5, NA, … | |
# $ DT_INIC_TR <date> 2001-01-04, 2001-02-20, 2001-01-12, 2001… | |
# $ RIFAMPICIN <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, 1… | |
# $ ISONIAZIDA <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,… | |
# $ ETAMBUTOL <fct> 2, NA, 2, 2, 2, 2, 2, 2, NA, NA, NA, NA, … | |
# $ ESTREPTOMI <fct> 2, NA, 2, 2, 2, 2, 2, 2, NA, NA, NA, NA, … | |
# $ PIRAZINAMI <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, 1… | |
# $ ETIONAMIDA <fct> 2, NA, 2, 2, 2, 2, 2, 2, NA, NA, NA, NA, … | |
# $ OUTRAS <fct> 2, NA, 2, 2, 2, NA, 2, 2, NA, NA, NA, NA,… | |
# $ OUTRAS_DES <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ TRAT_SUPER <fct> 9, 9, 9, 2, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9,… | |
# $ NU_CONTATO <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… | |
# $ DOENCA_TRA <fct> NA, NA, NA, 2, 2, NA, NA, NA, NA, NA, NA,… | |
# $ SG_UF_AT <fct> 41, 41, 26, 15, 13, 13, 52, 52, 33, 33, 3… | |
# $ ID_MUNIC_A <fct> 410420, 410690, 260570, 150140, 130120, 1… | |
# $ DT_NOTI_AT <date> 2001-01-04, 2001-02-20, 2001-01-12, 2001… | |
# $ SG_UF_2 <fct> 41, 41, 26, 15, 13, 13, 52, 52, 33, 33, 3… | |
# $ ID_MUNIC_2 <fct> 410420, 410420, 260570, 150140, 130120, 1… | |
# $ BACILOSC_1 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ BACILOSC_2 <fct> 2, NA, 2, NA, 3, NA, NA, NA, NA, 3, 3, 3,… | |
# $ BACILOSC_3 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ BACILOSC_4 <fct> 2, NA, 2, NA, 3, NA, NA, NA, NA, 3, 3, 3,… | |
# $ BACILOSC_5 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ BACILOSC_6 <fct> 2, NA, 2, NA, 3, NA, NA, NA, NA, 3, 3, 2,… | |
# $ TRATSUP_AT <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ DT_MUDANCA <date> NA, NA, NA, NA, NA, NA, 1899-12-30, 1899… | |
# $ NU_COMU_EX <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… | |
# $ SITUA_9_M <fct> 1, NA, 1, 1, 1, 1, 1, 5, 4, 1, 1, 1, 1, 1… | |
# $ SITUA_12_M <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ SITUA_ENCE <fct> 1, NA, 1, 1, 1, 1, 1, 5, 4, 1, 1, 1, 1, 1… | |
# $ DT_ENCERRA <date> 2001-07-03, 1899-12-30, 2001-09-13, 2001… | |
# $ TPUNINOT <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ POP_LIBER <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ POP_RUA <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ POP_SAUDE <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ POP_IMIG <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ BENEF_GOV <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ AGRAVDROGA <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ AGRAVTABAC <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ TEST_MOLEC <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ TEST_SENSI <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ ANT_RETRO <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ BAC_APOS_6 <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ TRANSF <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ UF_TRANSF <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# $ MUN_TRANSF <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… | |
# There were 15 warnings (use warnings() to see them) | |
# Cuidado! É uma base bem grande. | |
# O ideal é checar quais são as colunas que você realmente precisa, e selecionar elas. | |
# Depois, salve o arquivo final contendo apenas as colunas necessárias. | |
# tb_colunas_interesse <- tuberculose_tibble |> | |
# dplyr::select(....) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment