Skip to content

Instantly share code, notes, and snippets.

@hfrick
Created June 22, 2023 18:14
Show Gist options
  • Save hfrick/1efb20e8d0f1e3d53efdfb8e0646ce91 to your computer and use it in GitHub Desktop.
Save hfrick/1efb20e8d0f1e3d53efdfb8e0646ce91 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(tidymodels)
otters_raw <- read_csv("seot_morphometricsReproStatus_ak_monson.csv") %>%
janitor::clean_names()
otters <- otters_raw %>%
mutate(
final_age = if_else(final_age == -9, NA_real_, final_age),
weight = if_else(weight == -9, NA_real_, weight),
mean_tail_lgth = if_else(mean_tail_lgth == -9, NA_real_, mean_tail_lgth),
mean_lgth = if_else(mean_lgth == -9, NA_real_, mean_lgth),
mean_girth = if_else(mean_girth == -9, NA_real_, mean_girth),
paw = if_else(paw == -9, NA_real_, paw)
) %>%
filter(recap == 0) %>%
select(age = final_age, sex, weight, tail_length = mean_tail_lgth, length = mean_lgth, girth = mean_girth, paw)
# okay amount of missings
otters %>% count(is.na(age))
otters %>% count(is.na(sex))
otters %>% count(is.na(weight))
# half is missing
otters %>% count(is.na(length))
# where did my data go?
otters %>% count(is.na(tail_length))
otters %>% count(is.na(girth))
otters %>% count(is.na(paw))
# predict missing length --------------------------------------------------
otters_na <- otters %>%
mutate(
length_missing = factor(is.na(length), level = c(TRUE, FALSE)),
sex = ifelse(sex == "U", NA_character_, sex)
) %>%
select(age, weight, sex, length_missing) %>%
drop_na()
otters_na %>% ggplot() + geom_bar(aes(age, fill = length_missing))
otters_na %>% ggplot() + geom_bar(aes(age, fill = length_missing), position = "fill")
otters_na %>% ggplot() + geom_histogram(aes(weight)) + facet_grid(length_missing ~ .)
otters_na %>% ggplot() + geom_bar(aes(sex, fill = length_missing), position = "fill")
set.seed(403)
otters_split <- initial_split(otters_na, strata = length_missing)
otters_train <- training(otters_split)
otters_test <- testing(otters_split)
otters_folds <- vfold_cv(otters_train)
lr_wflow <- workflow(length_missing ~ age + weight + sex,
logistic_reg())
lr_fit <- fit_resamples(lr_wflow, otters_folds)
collect_metrics(lr_fit)
rf_wflow <- workflow(length_missing ~ age + weight + sex,
rand_forest(mode = "classification"))
rf_fit <- fit_resamples(rf_wflow, otters_folds)
collect_metrics(rf_fit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment