Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jtrecenti/6606f2665ab27fa5deccb88dccfd963d to your computer and use it in GitHub Desktop.
Save jtrecenti/6606f2665ab27fa5deccb88dccfd963d to your computer and use it in GitHub Desktop.
varas_corporativo.R
# Endpoint for SOAP requests
endpoint_url <- 'https://www.cnj.jus.br/corregedoria/justica_aberta/ws.php'
# XML payload, replace the method name and parameters as per actual API
# This is a simplified example assuming a SOAP method called 'getServentiaJudicial' that accepts 'seq_orgao' as a parameter
xml_payload <- '<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:cor="https://www.cnj.jus.br/corregedoria/justica_aberta/ws.php">
<soapenv:Header/>
<soapenv:Body>
<cor:pesquisarServentiasJudiciais>
<!--Optional:-->
<cor:seq_orgao></cor:seq_orgao>
<!--Optional:-->
<cor:dat_inicio>01/01/2000</cor:dat_inicio>
<!--Optional:-->
<cor:dat_fim></cor:dat_fim>
</cor:pesquisarServentiasJudiciais>
</soapenv:Body>
</soapenv:Envelope>'
# HTTP headers
headers <- httr::add_headers(
'Content-Type' = 'text/xml',
'SOAPAction' = 'https://www.cnj.jus.br/corregedoria/justica_aberta/ws.php/pesquisarServentiasJudiciaisIn'
)
# Making the request
r <- httr::POST(
endpoint_url, body = xml_payload, headers,
httr::write_disk("data-raw/xml_varas_cnj.xml")
)
# Function to extract the text or return NA if the node doesn't exist
extract_text_or_na <- function(node, xpath) {
found_node <- xml2::xml_find_first(node, xpath)
if (is.null(found_node)) return(NA)
xml2::xml_text(found_node)
}
# Function to extract information from a single 'item' node
extract_item_to_df_row <- function(item) {
tibble::tibble(
seq_orgao = as.integer(extract_text_or_na(item, ".//seq_orgao")),
dsc_orgao = extract_text_or_na(item, ".//dsc_orgao"),
tip_orgao = extract_text_or_na(item, ".//tip_orgao"),
seq_orgao_pai = as.integer(extract_text_or_na(item, ".//seq_orgao_pai")),
seq_tribunal_pai = as.integer(extract_text_or_na(item, ".//seq_tribunal_pai")),
cod_hierarquia = extract_text_or_na(item, ".//cod_hierarquia"),
seq_cidade = as.integer(extract_text_or_na(item, ".//seq_cidade")),
sig_uf = extract_text_or_na(item, ".//sig_uf"),
dsc_cidade = extract_text_or_na(item, ".//dsc_cidade"),
cod_ibge = extract_text_or_na(item, ".//cod_ibge"),
flg_ativo = extract_text_or_na(item, ".//flg_ativo"),
cep_serventia = is.na(extract_text_or_na(item, ".//cep_serventia")),
endereco_serventia = is.na(extract_text_or_na(item, ".//endereco_serventia")),
telefone_serventia = is.na(extract_text_or_na(item, ".//telefone_serventia")),
dat_alteracao = extract_text_or_na(item, ".//dat_alteracao"),
dsc_email_secretaria = extract_text_or_na(item, ".//dsc_email_secretaria")
)
}
safe <- purrr::possibly(extract_item_to_df_row, tibble::tibble(erro = "erro"))
# Find the 'item' nodes and get the text content
future::plan(future::multicore, workers = 8)
aux_varas_corporativo <- r |>
xml2::read_xml() |>
xml2::xml_find_all(".//item") |>
furrr::future_map(safe, .progress = TRUE) |>
purrr::list_rbind()
readr::write_rds(aux_varas_corporativo, "data-raw/aux_varas_corporativo.rds")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment