jmclawson/unnest_without_caps.R

## unnest_without_caps.R
unnest_without_caps <- function(
    df,
    column = "text") {

  full <- df |>
    tidytext::unnest_tokens(word, {{column}}, to_lower = FALSE)

  big <- full |>
    dplyr::filter(str_detect(word, "^[A-Z]")) |>
    dplyr::pull(word)

  small <- full |>
    dplyr::filter(str_detect(word, "^[a-z]")) |>
    dplyr::pull(word)

  only_caps <- base::setdiff(tolower(big), small)

  df |>
    tidytext::unnest_tokens(word, {{column}}) |>
    dplyr::filter(!word %in% only_caps)
}
	unnest_without_caps <- function(
	df,
	column = "text") {

	full <- df \|>
	tidytext::unnest_tokens(word, {{column}}, to_lower = FALSE)

	big <- full \|>
	dplyr::filter(str_detect(word, "^[A-Z]")) \|>
	dplyr::pull(word)

	small <- full \|>
	dplyr::filter(str_detect(word, "^[a-z]")) \|>
	dplyr::pull(word)

	only_caps <- base::setdiff(tolower(big), small)

	df \|>
	tidytext::unnest_tokens(word, {{column}}) \|>
	dplyr::filter(!word %in% only_caps)
	}