jtrecenti/pesquisa_sexo_relator.r

## pesquisa_sexo_relator.r
# arrumação dos dados para pesquisa

moral_raw <- readxl::read_excel("moral.xlsx")
sexual_raw <- readxl::read_excel("sexual.xlsx")

moral <- moral_raw |>
  janitor::clean_names() |>
  dplyr::mutate(
    no_processo_original = no_processo,
    no_processo = abjutils::clean_cnj(no_processo)
  ) |>
  dplyr::filter(
    !is.na(no_processo)
  ) |>
  dplyr::filter(
    purrr::map_lgl(no_processo, abjutils::check_dig)
  )

sexual <- sexual_raw |>
  janitor::clean_names() |>
  dplyr::mutate(
    no_processo_original = no_processo,
    no_processo = abjutils::clean_cnj(no_processo)
  ) |>
  dplyr::filter(
    !is.na(no_processo)
  ) |>
  dplyr::filter(
    purrr::map_lgl(no_processo, abjutils::check_dig)
  )

# 142+170=312
todos <- dplyr::bind_rows(list(moral = moral, sexual = sexual), .id = "tipo")

# essa parte aqui precisa de conhecimentos de raspagem de dados
# e resolução de captchas

pegar_id_processo <- function(n_processo) {
  u <- glue::glue(
    "https://pje.trt2.jus.br/pje-consulta-api/api/processos/dadosbasicos/{n_processo}"
  )
  r <- httr::GET(
    u,
    httr::add_headers("X-Grau-Instancia" = 2)
  )
  res <- httr::content(r) |>
    purrr::pluck(1, "id")
  if (is.null(res)) return("") else return(as.character(res))
}

todos_id_processo <- todos |>
  dplyr::mutate(
    id_processo = purrr::map_chr(
      no_processo, pegar_id_processo,
      .progress = TRUE
    )
  )

todos_id_processo_pesquisar <- todos_id_processo |>
  dplyr::filter(id_processo != "") |>
  dplyr::distinct(id_processo, .keep_all = TRUE)

# 289
# 289/312=92.6282%

captcha_access_trt <- function(path, id_processo) {
  u_captcha <- glue::glue(
    "https://pje.trt2.jus.br/pje-consulta-api/api/captcha?idProcesso={id_processo}"
  )
  f_captcha <- fs::file_temp(tmp_dir = path, ext = ".jpeg", pattern = "trt")
  if (!file.exists(f_captcha)) {
    captcha <- httr::GET(u_captcha)
    captcha |>
      httr::content() |>
      purrr::pluck("imagem") |>
      base64enc::base64decode() |>
      writeBin(f_captcha)
    token <- captcha |>
      httr::content() |>
      purrr::pluck("tokenDesafio")
  }
  list(token = token, f_captcha = f_captcha)
}

pegar_json <- function(obj, label, id_processo) {
  f <- glue::glue("json/{id_processo}.json")
  if (!file.exists(f)) {
    u_consulta <- glue::glue(
      "https://pje.trt2.jus.br/pje-consulta-api/api/processos/{id_processo}"
    )
    r_consulta <- httr::GET(
      u_consulta,
      query = list(tokenDesafio = obj$token, resposta = label),
      httr::add_headers(`X-Grau-Instancia` = 2),
      httr::write_disk(f, TRUE)
    )
  }
  f
}

model_trt <- captcha::captcha_load_model("trt")

baixar_infos <- function(id_processo) {
  f <- glue::glue("json/{id_processo}.json")
  if (!file.exists(f)) {
    infos <- captcha_access_trt("captcha", id_processo)
    f_captcha <- infos$f_captcha
    label <- captcha::decrypt(f_captcha, model_trt)
    f_json <- pegar_json(infos, label, id_processo)
    f_json
  }
}

safe <- purrr::possibly(baixar_infos)

purrr::walk(
  todos_id_processo_pesquisar$id_processo,
  safe,
  .progress = TRUE
)

aux_result <- fs::dir_ls("json") |>
  purrr::map(\(x) {
    res <- jsonlite::read_json(x)
    msg <- res$mensagem
    if (!is.null(msg)) {
      tab <- tibble::tibble(erro = "erro captcha")
    } else {
      tab <- tibble::tibble(relator = res$pessoaRelator)
    }
    tab
  }) |>
  purrr::list_rbind(names_to = "json_file")

aux_result |>
  dplyr::filter(erro == "erro captcha") |>
  with(json_file) |>
  fs::file_delete()


aux_result_sexo <- aux_result |>
  dplyr::mutate(relator = stringr::str_squish(relator)) |>
  dplyr::mutate(
    sexo_relator = genderBR::get_gender(relator),
    sexo_relator = dplyr::case_match(
      # ajustes manuais (pesquisei na internet)
      relator,
      "DAMIA AVOLI" ~ "Female",
      "EROTILDE RIBEIRO DOS SANTOS" ~ "Female",
      "ROVIRSO APARECIDO BOLDO" ~ "Male",
      "LYCANTHIA CAROLINA RAMAGE" ~ "Female",
      "KYONG MI LEE" ~ "Female",
      .default = sexo_relator
    ),
    id_processo = tools::file_path_sans_ext(basename(json_file))
  ) |>
  dplyr::inner_join(
    dplyr::select(
      todos_id_processo_pesquisar, id_processo, no_processo_original
    ),
    by = "id_processo"
  ) |>
  dplyr::select(-json_file, -id_processo)


sexual_final <- sexual_raw |>
  dplyr::left_join(
    aux_result_sexo,
    by = c("Nº Processo" = "no_processo_original")
  )

moral_final <- moral_raw |>
  dplyr::left_join(
    aux_result_sexo,
    by = c("Nº Processo" = "no_processo_original")
  )

writexl::write_xlsx(moral_final, "moral_com_relatores.xlsx")
writexl::write_xlsx(sexual_final, "sexual_com_relatores.xlsx")

# agora alguns gráficos

moral_final |>
  janitor::clean_names() |>
  dplyr::filter(
    !is.na(sexo_relator),
    sexo_presumido_da_vitima != "Não definido no processo"
  ) |>
  dplyr::group_by(
    sexo_relator,
    sexo_presumido_da_vitima
  ) |>
  dplyr::summarise(
    n = dplyr::n(),
    prop_provimento = mean(provimento_do_assedio == "Sim"),
    .groups = "drop"
  ) |>
  ggplot2::ggplot() +
  ggplot2::aes(
    x = prop_provimento, fill = sexo_relator, y = sexo_presumido_da_vitima
  ) +
  ggplot2::geom_col(position = "dodge") +
  ggplot2::theme_minimal(15) +
  ggplot2::scale_x_continuous(labels = scales::percent) +
  ggplot2::scale_fill_viridis_d(begin = .3, end = .7, option = 2) +
  ggplot2::labs(
    x = "Proporção de provimento",
    y = "Gênero da vítima",
    fill = "Gênero do relator"
  ) +
  ggplot2::theme(
    legend.position = "bottom"
  )


sexual_final |>
  janitor::clean_names() |>
  dplyr::filter(
    !is.na(sexo_relator),
  ) |>
  dplyr::group_by(sexo_relator) |>
  dplyr::summarise(
    n = dplyr::n(),
    prop_provimento = mean(provimento_do_assedio == "Sim"),
    .groups = "drop"
  ) |>
  ggplot2::ggplot() +
  ggplot2::aes(
    x = prop_provimento, y = sexo_relator
  ) +
  ggplot2::geom_col(
    fill = viridis::viridis(2, begin = .3, end = .7, option = 2)[2],
    position = "dodge"
  ) +
  ggplot2::theme_minimal(15) +
  ggplot2::scale_x_continuous(labels = scales::percent) +
  ggplot2::labs(
    x = "Proporção de provimento",
    y = "Gênero do relator"
  ) +
  ggplot2::theme(
    legend.position = "bottom"
  )
	# arrumação dos dados para pesquisa

	moral_raw <- readxl::read_excel("moral.xlsx")
	sexual_raw <- readxl::read_excel("sexual.xlsx")

	moral <- moral_raw \|>
	janitor::clean_names() \|>
	dplyr::mutate(
	no_processo_original = no_processo,
	no_processo = abjutils::clean_cnj(no_processo)
	) \|>
	dplyr::filter(
	!is.na(no_processo)
	) \|>
	dplyr::filter(
	purrr::map_lgl(no_processo, abjutils::check_dig)
	)

	sexual <- sexual_raw \|>
	janitor::clean_names() \|>
	dplyr::mutate(
	no_processo_original = no_processo,
	no_processo = abjutils::clean_cnj(no_processo)
	) \|>
	dplyr::filter(
	!is.na(no_processo)
	) \|>
	dplyr::filter(
	purrr::map_lgl(no_processo, abjutils::check_dig)
	)

	# 142+170=312
	todos <- dplyr::bind_rows(list(moral = moral, sexual = sexual), .id = "tipo")

	# essa parte aqui precisa de conhecimentos de raspagem de dados
	# e resolução de captchas

	pegar_id_processo <- function(n_processo) {
	u <- glue::glue(
	"https://pje.trt2.jus.br/pje-consulta-api/api/processos/dadosbasicos/{n_processo}"
	)
	r <- httr::GET(
	u,
	httr::add_headers("X-Grau-Instancia" = 2)
	)
	res <- httr::content(r) \|>
	purrr::pluck(1, "id")
	if (is.null(res)) return("") else return(as.character(res))
	}

	todos_id_processo <- todos \|>
	dplyr::mutate(
	id_processo = purrr::map_chr(
	no_processo, pegar_id_processo,
	.progress = TRUE
	)
	)

	todos_id_processo_pesquisar <- todos_id_processo \|>
	dplyr::filter(id_processo != "") \|>
	dplyr::distinct(id_processo, .keep_all = TRUE)

	# 289
	# 289/312=92.6282%

	captcha_access_trt <- function(path, id_processo) {
	u_captcha <- glue::glue(
	"https://pje.trt2.jus.br/pje-consulta-api/api/captcha?idProcesso={id_processo}"
	)
	f_captcha <- fs::file_temp(tmp_dir = path, ext = ".jpeg", pattern = "trt")
	if (!file.exists(f_captcha)) {
	captcha <- httr::GET(u_captcha)
	captcha \|>
	httr::content() \|>
	purrr::pluck("imagem") \|>
	base64enc::base64decode() \|>
	writeBin(f_captcha)
	token <- captcha \|>
	httr::content() \|>
	purrr::pluck("tokenDesafio")
	}
	list(token = token, f_captcha = f_captcha)
	}

	pegar_json <- function(obj, label, id_processo) {
	f <- glue::glue("json/{id_processo}.json")
	if (!file.exists(f)) {
	u_consulta <- glue::glue(
	"https://pje.trt2.jus.br/pje-consulta-api/api/processos/{id_processo}"
	)
	r_consulta <- httr::GET(
	u_consulta,
	query = list(tokenDesafio = obj$token, resposta = label),
	httr::add_headers(`X-Grau-Instancia` = 2),
	httr::write_disk(f, TRUE)
	)
	}
	f
	}

	model_trt <- captcha::captcha_load_model("trt")

	baixar_infos <- function(id_processo) {
	f <- glue::glue("json/{id_processo}.json")
	if (!file.exists(f)) {
	infos <- captcha_access_trt("captcha", id_processo)
	f_captcha <- infos$f_captcha
	label <- captcha::decrypt(f_captcha, model_trt)
	f_json <- pegar_json(infos, label, id_processo)
	f_json
	}
	}

	safe <- purrr::possibly(baixar_infos)

	purrr::walk(
	todos_id_processo_pesquisar$id_processo,
	safe,
	.progress = TRUE
	)

	aux_result <- fs::dir_ls("json") \|>
	purrr::map(\(x) {
	res <- jsonlite::read_json(x)
	msg <- res$mensagem
	if (!is.null(msg)) {
	tab <- tibble::tibble(erro = "erro captcha")
	} else {
	tab <- tibble::tibble(relator = res$pessoaRelator)
	}
	tab
	}) \|>
	purrr::list_rbind(names_to = "json_file")

	aux_result \|>
	dplyr::filter(erro == "erro captcha") \|>
	with(json_file) \|>
	fs::file_delete()


	aux_result_sexo <- aux_result \|>
	dplyr::mutate(relator = stringr::str_squish(relator)) \|>
	dplyr::mutate(
	sexo_relator = genderBR::get_gender(relator),
	sexo_relator = dplyr::case_match(
	# ajustes manuais (pesquisei na internet)
	relator,
	"DAMIA AVOLI" ~ "Female",
	"EROTILDE RIBEIRO DOS SANTOS" ~ "Female",
	"ROVIRSO APARECIDO BOLDO" ~ "Male",
	"LYCANTHIA CAROLINA RAMAGE" ~ "Female",
	"KYONG MI LEE" ~ "Female",
	.default = sexo_relator
	),
	id_processo = tools::file_path_sans_ext(basename(json_file))
	) \|>
	dplyr::inner_join(
	dplyr::select(
	todos_id_processo_pesquisar, id_processo, no_processo_original
	),
	by = "id_processo"
	) \|>
	dplyr::select(-json_file, -id_processo)



	sexual_final <- sexual_raw \|>
	dplyr::left_join(
	aux_result_sexo,
	by = c("Nº Processo" = "no_processo_original")
	)

	moral_final <- moral_raw \|>
	dplyr::left_join(
	aux_result_sexo,
	by = c("Nº Processo" = "no_processo_original")
	)

	writexl::write_xlsx(moral_final, "moral_com_relatores.xlsx")
	writexl::write_xlsx(sexual_final, "sexual_com_relatores.xlsx")

	# agora alguns gráficos

	moral_final \|>
	janitor::clean_names() \|>
	dplyr::filter(
	!is.na(sexo_relator),
	sexo_presumido_da_vitima != "Não definido no processo"
	) \|>
	dplyr::group_by(
	sexo_relator,
	sexo_presumido_da_vitima
	) \|>
	dplyr::summarise(
	n = dplyr::n(),
	prop_provimento = mean(provimento_do_assedio == "Sim"),
	.groups = "drop"
	) \|>
	ggplot2::ggplot() +
	ggplot2::aes(
	x = prop_provimento, fill = sexo_relator, y = sexo_presumido_da_vitima
	) +
	ggplot2::geom_col(position = "dodge") +
	ggplot2::theme_minimal(15) +
	ggplot2::scale_x_continuous(labels = scales::percent) +
	ggplot2::scale_fill_viridis_d(begin = .3, end = .7, option = 2) +
	ggplot2::labs(
	x = "Proporção de provimento",
	y = "Gênero da vítima",
	fill = "Gênero do relator"
	) +
	ggplot2::theme(
	legend.position = "bottom"
	)


	sexual_final \|>
	janitor::clean_names() \|>
	dplyr::filter(
	!is.na(sexo_relator),
	) \|>
	dplyr::group_by(sexo_relator) \|>
	dplyr::summarise(
	n = dplyr::n(),
	prop_provimento = mean(provimento_do_assedio == "Sim"),
	.groups = "drop"
	) \|>
	ggplot2::ggplot() +
	ggplot2::aes(
	x = prop_provimento, y = sexo_relator
	) +
	ggplot2::geom_col(
	fill = viridis::viridis(2, begin = .3, end = .7, option = 2)[2],
	position = "dodge"
	) +
	ggplot2::theme_minimal(15) +
	ggplot2::scale_x_continuous(labels = scales::percent) +
	ggplot2::labs(
	x = "Proporção de provimento",
	y = "Gênero do relator"
	) +
	ggplot2::theme(
	legend.position = "bottom"
	)