Created
May 23, 2025 00:29
-
-
Save popovs/6536a926b5bcbeb0327d9d7f363f7c9c to your computer and use it in GitHub Desktop.
Identifying spikes in animal tracks using the isolation forest algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| tar_load(elk) | |
| elk <- elk[elk$animal_id == "18-11034",] | |
| elk <- elk[elk$month == 10, ] | |
| elk <- elk[elk$year == 2020, ] | |
| plot(elk) | |
| hist(elk$step_length_m) | |
| plot(density(elk$step_length_m)) | |
| plot(density(elk$spd_kph)) | |
| elk$spd_kph | |
| plot(density(elk$NSD)) | |
| # Try isotree to flag outlier in terms of step length and/or spd? | |
| isodata <- as.data.frame(elk[["step_length_m"]]) | |
| names(isodata) <- "step_length_m" | |
| if_model <- isotree::isolation.forest(isodata) | |
| scores <- isotree::predict.isolation_forest(if_model, isodata, type = "avg_depth") | |
| hist(scores) | |
| plot(isodata$step_length_m, scores) | |
| elk$iso_score <- scores | |
| elk$iso_cutoff <- elk$iso_score < 4 | |
| plot(elk["iso_score"]) | |
| plot(elk["iso_cutoff"]) | |
| plot(elk[elk$iso_cutoff == TRUE, "iso_cutoff"]) | |
| plot(elk$dttm, elk$iso_score) | |
| plot(elk$dttm, elk$iso_cutoff) | |
| plot(elk["dttm"]) | |
| plot(elk$dttm, elk$step_length_m) | |
| # Now try the same thing but with spd_kph. Since speed and | |
| # step length are derived from the same thing it should be itentical | |
| isodata <- sf::st_drop_geometry(elk[,c("spd_kph")]) | |
| if_model <- isotree::isolation.forest(isodata) | |
| scores <- isotree::predict.isolation_forest(if_model, isodata, type = "avg_depth") | |
| hist(scores) | |
| plot(isodata$spd_kph, scores) | |
| elk$iso_score <- scores | |
| elk$iso_cutoff <- elk$iso_score < 4 | |
| plot(elk["iso_score"]) | |
| plot(elk["iso_cutoff"]) | |
| plot(elk[elk$iso_cutoff == TRUE, "iso_cutoff"]) | |
| plot(elk$dttm, elk$iso_score) | |
| plot(elk$dttm, elk$iso_cutoff) | |
| plot(elk$dttm, elk$spd_kph) | |
| ## Now try on entire dataset | |
| tar_load(elk) | |
| isodata <- sf::st_drop_geometry(elk[,c("animal_id", "step_length_m")]) | |
| if_model <- isotree::isolation.forest(isodata, ndim = 2) | |
| scores <- isotree::predict.isolation_forest(if_model, isodata, type = "avg_depth") | |
| hist(scores) | |
| plot(isodata$step_length_m, scores) | |
| plot(log(isodata$step_length_m), scores) | |
| quantile(scores) # looks like 4 is actually a great cutoff | |
| # Add it to the elk dataset | |
| elk$isoscore <- scores | |
| elk$iso_pass <- elk$isoscore > 4 # use 4 as the cutoff | |
| library(ggplot2) | |
| ggplot() + | |
| geom_sf(data = elk, | |
| aes(color = iso_pass, | |
| shape = iso_pass)) | |
| ggplot() + | |
| geom_sf(data = elk, | |
| aes(color = isoscore)) | |
| # What about quantile by animal_id? | |
| x <- elk |> | |
| sf::st_drop_geometry() |> | |
| dplyr::select(animal_id, isoscore) |> | |
| dplyr::group_by(animal_id) |> | |
| dplyr::summarise(tibble::as_tibble_row(quantile(isoscore))) | |
| names(x) <- c("animal_id", "q0", "q25", "q50", "q75", "q100") | |
| hist(x$q0) | |
| plot(density(x$q0)) | |
| median(x$q0) | |
| elk$iso_pass <- elk$isoscore > 7 # use the median q0 as the isopass score? | |
| ggplot() + | |
| geom_sf(data = elk, | |
| aes(color = iso_pass, | |
| shape = iso_pass)) | |
| elk_linestrings <- elk |> | |
| dplyr::group_by(animal_id) |> | |
| dplyr::arrange(dttm) |> | |
| dplyr::summarize(do_union = FALSE, | |
| .groups = 'drop') |> | |
| sf::st_cast("LINESTRING") | |
| ggplot() + | |
| geom_sf(data = elk_linestrings, | |
| alpha = 0.1) + | |
| geom_sf(data = elk[elk$iso_pass == TRUE, ], | |
| aes(color = animal_id), | |
| shape = 20, | |
| alpha = 0.3, | |
| show.legend = FALSE) + | |
| geom_sf(data = elk[elk$iso_pass == FALSE, ], | |
| shape = 4, | |
| color = "red", | |
| show.legend = FALSE) + | |
| theme_minimal() | |
| # Needs to be refined further: it should cut out points where the step | |
| # length/speed both before AND after is too high. | |
| elk <- elk |> | |
| dplyr::mutate(iso_pass = !((dplyr::lag(isoscore) < 7) & (isoscore < 7))) | |
| ggplot() + | |
| geom_sf(data = elk_linestrings, | |
| alpha = 0.1) + | |
| geom_sf(data = elk[elk$iso_pass == TRUE, ], | |
| aes(color = animal_id), | |
| shape = 20, | |
| alpha = 0.3, | |
| show.legend = FALSE) + | |
| geom_sf(data = elk[elk$iso_pass == FALSE, ], | |
| shape = 4, | |
| color = "red", | |
| show.legend = FALSE) + | |
| theme_minimal() |
Author
popovs
commented
May 23, 2025

Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment