Skip to content

Instantly share code, notes, and snippets.

@avallecam
Created January 24, 2023 13:52
Show Gist options
  • Save avallecam/433f51c6ef8a5ec88028dec9b0dd39f9 to your computer and use it in GitHub Desktop.
Save avallecam/433f51c6ef8a5ec88028dec9b0dd39f9 to your computer and use it in GitHub Desktop.
handle missing observations for factor variables
if (!require("pacman")) install.packages("pacman")
#> Loading required package: pacman
pacman::p_load(tidyverse)
# con missing como NA -----------------------------------------------------
f1 <- tibble(variable = c("a", "a", NA, NA, "a", "b",
NA, "c", "a", "c", "b"))
f1
#> # A tibble: 11 × 1
#> variable
#> <chr>
#> 1 a
#> 2 a
#> 3 <NA>
#> 4 <NA>
#> 5 a
#> 6 b
#> 7 <NA>
#> 8 c
#> 9 a
#> 10 c
#> 11 b
f1 %>% count(variable)
#> # A tibble: 4 × 2
#> variable n
#> <chr> <int>
#> 1 a 4
#> 2 b 2
#> 3 c 2
#> 4 <NA> 3
f2 <- f1 %>%
mutate(variable = as.factor(variable))
f2
#> # A tibble: 11 × 1
#> variable
#> <fct>
#> 1 a
#> 2 a
#> 3 <NA>
#> 4 <NA>
#> 5 a
#> 6 b
#> 7 <NA>
#> 8 c
#> 9 a
#> 10 c
#> 11 b
f2 %>% count(variable)
#> # A tibble: 4 × 2
#> variable n
#> <fct> <int>
#> 1 a 4
#> 2 b 2
#> 3 c 2
#> 4 <NA> 3
levels(f2$variable)
#> [1] "a" "b" "c"
# si no tengo los missing como NA -----------------------------------------
f1 <- tibble(variable = c("a", "a", NA, NA, "a", "b",
NA, "missing", "a", "missing", "b"))
f1
#> # A tibble: 11 × 1
#> variable
#> <chr>
#> 1 a
#> 2 a
#> 3 <NA>
#> 4 <NA>
#> 5 a
#> 6 b
#> 7 <NA>
#> 8 missing
#> 9 a
#> 10 missing
#> 11 b
f1 %>% count(variable)
#> # A tibble: 4 × 2
#> variable n
#> <chr> <int>
#> 1 a 4
#> 2 b 2
#> 3 missing 2
#> 4 <NA> 3
f2 <- f1 %>%
mutate(variable = as.factor(variable))
f2 %>% count(variable)
#> # A tibble: 4 × 2
#> variable n
#> <fct> <int>
#> 1 a 4
#> 2 b 2
#> 3 missing 2
#> 4 <NA> 3
levels(f2$variable)
#> [1] "a" "b" "missing"
pacman::p_load(naniar)
f3 <- f1 %>%
naniar::replace_with_na(replace = list(variable = "missing"))
f3
#> # A tibble: 11 × 1
#> variable
#> <chr>
#> 1 a
#> 2 a
#> 3 <NA>
#> 4 <NA>
#> 5 a
#> 6 b
#> 7 <NA>
#> 8 <NA>
#> 9 a
#> 10 <NA>
#> 11 b
f4 <- f3 %>%
mutate(variable = as.factor(variable))
f4
#> # A tibble: 11 × 1
#> variable
#> <fct>
#> 1 a
#> 2 a
#> 3 <NA>
#> 4 <NA>
#> 5 a
#> 6 b
#> 7 <NA>
#> 8 <NA>
#> 9 a
#> 10 <NA>
#> 11 b
f4 %>% count(variable)
#> # A tibble: 3 × 2
#> variable n
#> <fct> <int>
#> 1 a 4
#> 2 b 2
#> 3 <NA> 5
levels(f4$variable)
#> [1] "a" "b"
# hacer missing explicito -------------------------------------------------
f2
#> # A tibble: 11 × 1
#> variable
#> <fct>
#> 1 a
#> 2 a
#> 3 <NA>
#> 4 <NA>
#> 5 a
#> 6 b
#> 7 <NA>
#> 8 missing
#> 9 a
#> 10 missing
#> 11 b
f3 <- f2 %>%
mutate(variable = fct_explicit_na(variable,
na_level = "missing"))
f3
#> # A tibble: 11 × 1
#> variable
#> <fct>
#> 1 a
#> 2 a
#> 3 missing
#> 4 missing
#> 5 a
#> 6 b
#> 7 missing
#> 8 missing
#> 9 a
#> 10 missing
#> 11 b
f3 %>% count(variable)
#> # A tibble: 3 × 2
#> variable n
#> <fct> <int>
#> 1 a 4
#> 2 b 2
#> 3 missing 5
levels(f3$variable)
#> [1] "a" "b" "missing"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment