Skip to content

Instantly share code, notes, and snippets.

@popovs
Created May 23, 2025 00:29
Show Gist options
  • Select an option

  • Save popovs/6536a926b5bcbeb0327d9d7f363f7c9c to your computer and use it in GitHub Desktop.

Select an option

Save popovs/6536a926b5bcbeb0327d9d7f363f7c9c to your computer and use it in GitHub Desktop.
Identifying spikes in animal tracks using the isolation forest algorithm
tar_load(elk)
elk <- elk[elk$animal_id == "18-11034",]
elk <- elk[elk$month == 10, ]
elk <- elk[elk$year == 2020, ]
plot(elk)
hist(elk$step_length_m)
plot(density(elk$step_length_m))
plot(density(elk$spd_kph))
elk$spd_kph
plot(density(elk$NSD))
# Try isotree to flag outlier in terms of step length and/or spd?
isodata <- as.data.frame(elk[["step_length_m"]])
names(isodata) <- "step_length_m"
if_model <- isotree::isolation.forest(isodata)
scores <- isotree::predict.isolation_forest(if_model, isodata, type = "avg_depth")
hist(scores)
plot(isodata$step_length_m, scores)
elk$iso_score <- scores
elk$iso_cutoff <- elk$iso_score < 4
plot(elk["iso_score"])
plot(elk["iso_cutoff"])
plot(elk[elk$iso_cutoff == TRUE, "iso_cutoff"])
plot(elk$dttm, elk$iso_score)
plot(elk$dttm, elk$iso_cutoff)
plot(elk["dttm"])
plot(elk$dttm, elk$step_length_m)
# Now try the same thing but with spd_kph. Since speed and
# step length are derived from the same thing it should be itentical
isodata <- sf::st_drop_geometry(elk[,c("spd_kph")])
if_model <- isotree::isolation.forest(isodata)
scores <- isotree::predict.isolation_forest(if_model, isodata, type = "avg_depth")
hist(scores)
plot(isodata$spd_kph, scores)
elk$iso_score <- scores
elk$iso_cutoff <- elk$iso_score < 4
plot(elk["iso_score"])
plot(elk["iso_cutoff"])
plot(elk[elk$iso_cutoff == TRUE, "iso_cutoff"])
plot(elk$dttm, elk$iso_score)
plot(elk$dttm, elk$iso_cutoff)
plot(elk$dttm, elk$spd_kph)
## Now try on entire dataset
tar_load(elk)
isodata <- sf::st_drop_geometry(elk[,c("animal_id", "step_length_m")])
if_model <- isotree::isolation.forest(isodata, ndim = 2)
scores <- isotree::predict.isolation_forest(if_model, isodata, type = "avg_depth")
hist(scores)
plot(isodata$step_length_m, scores)
plot(log(isodata$step_length_m), scores)
quantile(scores) # looks like 4 is actually a great cutoff
# Add it to the elk dataset
elk$isoscore <- scores
elk$iso_pass <- elk$isoscore > 4 # use 4 as the cutoff
library(ggplot2)
ggplot() +
geom_sf(data = elk,
aes(color = iso_pass,
shape = iso_pass))
ggplot() +
geom_sf(data = elk,
aes(color = isoscore))
# What about quantile by animal_id?
x <- elk |>
sf::st_drop_geometry() |>
dplyr::select(animal_id, isoscore) |>
dplyr::group_by(animal_id) |>
dplyr::summarise(tibble::as_tibble_row(quantile(isoscore)))
names(x) <- c("animal_id", "q0", "q25", "q50", "q75", "q100")
hist(x$q0)
plot(density(x$q0))
median(x$q0)
elk$iso_pass <- elk$isoscore > 7 # use the median q0 as the isopass score?
ggplot() +
geom_sf(data = elk,
aes(color = iso_pass,
shape = iso_pass))
elk_linestrings <- elk |>
dplyr::group_by(animal_id) |>
dplyr::arrange(dttm) |>
dplyr::summarize(do_union = FALSE,
.groups = 'drop') |>
sf::st_cast("LINESTRING")
ggplot() +
geom_sf(data = elk_linestrings,
alpha = 0.1) +
geom_sf(data = elk[elk$iso_pass == TRUE, ],
aes(color = animal_id),
shape = 20,
alpha = 0.3,
show.legend = FALSE) +
geom_sf(data = elk[elk$iso_pass == FALSE, ],
shape = 4,
color = "red",
show.legend = FALSE) +
theme_minimal()
# Needs to be refined further: it should cut out points where the step
# length/speed both before AND after is too high.
elk <- elk |>
dplyr::mutate(iso_pass = !((dplyr::lag(isoscore) < 7) & (isoscore < 7)))
ggplot() +
geom_sf(data = elk_linestrings,
alpha = 0.1) +
geom_sf(data = elk[elk$iso_pass == TRUE, ],
aes(color = animal_id),
shape = 20,
alpha = 0.3,
show.legend = FALSE) +
geom_sf(data = elk[elk$iso_pass == FALSE, ],
shape = 4,
color = "red",
show.legend = FALSE) +
theme_minimal()
@popovs
Copy link
Copy Markdown
Author

popovs commented May 23, 2025

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment