Last active
May 29, 2023 19:47
-
-
Save popovs/92fbdb44c22e565b92808b75da8de848 to your computer and use it in GitHub Desktop.
A tutorial in R on making beautiful animal movement maps, complete color gradient lines to show the passage of time.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## BEAUTIFUL GULL MAPS! ## | |
# @srharacha on Twitter: https://twitter.com/srharacha/status/1662648523597418501?s=20 | |
# Step 1: download the data | |
# We're going to download study number 986040562, our | |
# herring gull data, using the `move` package. You'll | |
# need a Movebank username and password to download it. | |
library(move) | |
# We only want to download the track for one bird - the | |
# dataset is far too large to download everything. | |
# Let's first download all the bird metadata for this study. | |
birds <- getMovebank("individual", study_id = 986040562) | |
# We want to get the `id` for one particular bird with an | |
# interesting track: H903604. | |
id <- birds[["id"]][birds$local_identifier == "H903604"] | |
rm(birds) | |
# Now download the tracks for bird H903605. You will have | |
# to enter your login credentials again. This will take a | |
# minute or two. | |
tracks <- getMovebank("event", | |
study_id = 986040562, | |
individual_id = id) | |
# Quickly visualize it to make sure it roughly matches | |
# the map we see online! | |
library(ggplot2) | |
ggplot(tracks, | |
aes(x = location_long, | |
y = location_lat)) + | |
geom_point() | |
# Step 2: Trim down the dataset | |
# We need to first remove NA latitude/longitudes. Conveniently, | |
# this also thins out the large dataset considerably. | |
tracks <- tracks[!is.na(tracks$location_long), ] # remove NA lat/long records | |
tracks <- tracks[!is.na(tracks$location_lat), ] | |
# For simple visualization, this is still a ridiculously large | |
# number of data points. This step is optional, but | |
# I prefer to thin out my datasets to a manageable number | |
# of points. Let's aggregate to one point per day, and, for | |
# the sake of this example, cut it down to just 2 years of data. | |
# The code will run MUCH faster with ~500 rows, and this is meant | |
# to be a quick example after all! | |
tracks$timestamp <- lubridate::as_datetime(tracks$timestamp, tz = "CET") | |
tracks$date <- as.character(as.Date(tracks$timestamp)) | |
tracks <- tracks[!is.na(tracks$timestamp), ] # drop any timestamps that failed to parse | |
# Calculate mean lat/long by date | |
# Also, we want to arrange the dataset by increasing date - | |
# we want to make sure the points are in the correct order | |
# before connecting the dots to make a linestring. | |
library(dplyr) | |
t <- tracks %>% | |
group_by(date) %>% | |
summarise(lat = mean(location_lat), | |
lon = mean(location_long)) %>% | |
mutate(date = as.POSIXct(date)) %>% | |
filter(date < "2016-01-01") %>% | |
arrange(date) | |
# Step 3: Convert points to linestring | |
# Create sf object | |
library(sf) | |
t <- st_as_sf(t, | |
coords = c('lon', 'lat'), | |
crs = "+proj=longlat") | |
t <- st_transform(t, crs = 4326) | |
# Check it out | |
ggplot(data = t) + geom_sf() | |
# Now we can finally turn these thinned points | |
# into a linestring object! | |
l <- t %>% | |
dplyr::arrange(date) %>% | |
dplyr::summarize(do_union = FALSE, # this is KEY right here - we do not want to perform a GIS union operation. | |
.groups = 'drop') %>% | |
sf::st_cast("LINESTRING") | |
# Check it out | |
ggplot(data = l) + geom_sf() | |
# Step 4: Smooth out the line | |
# Now we have a great linestring, but it's pretty | |
# ugly with all those pointy corners. Let's smooth | |
# that out with the `smoothr` package. I just use the | |
# default 'chaikin' method, but there are many options | |
# available. | |
s <- smoothr::smooth(l, method = "chaikin") | |
# There are still some pointy angles, but it looks much | |
# nicer than the original data! | |
ggplot(data = s) + geom_sf() | |
# Step 5: Merge smoothed data with the original data | |
# Unfortunately here we hit a major snag: the smoothed | |
# data don't contain any timestamps... we need to merge | |
# this back to the original dataset to attach a timestamp | |
# to the smoothed data. | |
# The solution has two parts: | |
# 1) Extract the points from s and merge to t by the nearest | |
# point. | |
# 2) Keep track of the order the smoothed data is in so that | |
# we can unscramble up any wonky timestamps after the merge. | |
# The reason will become clear later. | |
s_pts <- st_cast(s, "POINT") | |
s_pts$seq <- seq(nrow(s_pts)) | |
head(s_pts) | |
# Merge smoothed data to original data by nearest point. | |
# We'll call this "out", aka our eventual output. | |
out <- st_join(s_pts, t, join = st_nearest_feature) | |
# Note you can immediately see here that the timestamps | |
# are all totally messed up. This is because sometimes | |
# points with completely different timestamps are geo- | |
# graphically close enough they can still be merged. This | |
# is why the 'seq' column is so important! And brings us to | |
# the next step... | |
head(out) | |
# Step 6: Reorder timestamps to the correct order | |
# As you can see the smoothed data and the original | |
# data are merged, but the timestamps are absolutely | |
# out of whack. Compare the two visually and it's immediately | |
# clear: | |
p1 <- ggplot(data = t, aes(color = date)) + geom_sf() + labs(color = "Date") | |
p2 <- ggplot(data = out, aes(color = date)) + geom_sf() + labs(color = "Date") | |
ggpubr::ggarrange(p1, p2, nrow = 1, ncol = 2, common.legend = TRUE) | |
# The key thing here is we know two timestamps with 100% | |
# accuracy: the first and last. So let's set those manually. | |
# In this specific example they are correct but there are | |
# sometimes cases where they aren't so this is an important | |
# step! | |
out[["date"]][1] <- min(t$date) | |
out[["date"]][nrow(out)] <- max(t$date) | |
# Next, create the difftime column - time difference between | |
# row n and n+1. Any negative difftimes = bad! Animal can't | |
# be going backwards in time. | |
out$difftime <- difftime(out$date, dplyr::lag(out$date)) | |
# And also pull median time difference from original dataset | |
# Any difftime > than that will be flagged (in addition to | |
# negative difftimes). Basically, if the interpolated timestamps | |
# jump over time gaps that are greater than what the tag itself | |
# transmits, that's probably an interpolation error. The median | |
# timestamp lag in the original dataset will therefore serve as our | |
# upper limit or "max time difference" that we'll allow in our | |
# interpolations. | |
max_diff <- median(difftime(t$date, dplyr::lag(t$date)), na.rm = T) | |
# Now we are going to use this difftime and max_diff | |
# columns to figure out which timestamps are out of order | |
# or ok. The logic behind this is that if the difference between | |
# the current timestamp and the one right before it is >= 0, | |
# then it's probably not out of order. If it's *negative*, | |
# however, then it for sure is wrong. So, we'll create the | |
# 't_interp' column to keep track of this. I'm using the | |
# data.table 'fcase' function for this since it's lightning | |
# fast compared to vanilla base ifelse or dplyr solutions - ideal | |
# for those huge telemetry datasets out there. | |
# The if/else logic is simple: | |
# - If difftime > max_diff, the result should be NA* | |
# - If difftime >= 0, use the date in the date column | |
# - If difftime is NA, use the date in the date column | |
# *NOTE: we wrap NA with as.POSIXct so that it matches the same | |
# data type as the 'date' column. Otherwise it fails. | |
data.table::setDT(out) | |
out[ , t_interp := data.table::fcase( | |
difftime > max_diff, as.POSIXct(NA, tz = "CET"), | |
difftime >= 0, date, | |
is.na(difftime), date | |
)] | |
# Now we reset the first and last timestamps in case the ifelse | |
# and interpolate over the gaps in t_interp using the `zoo` | |
# package. | |
out$t_interp[1] <- min(t$date) | |
out$t_interp[nrow(out)] <- max(t$date) | |
out$t_interp <- zoo::na.approx(out$t_interp) %>% | |
as.POSIXct(origin = "1970-01-01", tz = "CET") | |
# Let's compare now... | |
p3 <- ggplot(data = st_as_sf(out), aes(color = t_interp)) + geom_sf() + labs(color = "Date") | |
ggpubr::ggarrange(p2, p3, nrow = 1, ncol = 2, common.legend = TRUE) | |
# With just one round of interpolation, it's changed slightly - | |
# Notice the top two east-west tracks have smoother color gradients, | |
# indicating reordered timestamps. The t_interp column already makes | |
# more sense than the original 'date' column. However, it's clear the | |
# dataset still needs some work. | |
head(out) | |
# The solution: re-iterate the difftime calculation and | |
# date interpolation until all difftime is < max_diff and | |
# there are no more negative difftimes. Time for a 'while' loop! | |
while(max(out$difftime, na.rm = T) > max_diff | min(out$difftime, na.rm = T) < 0) { | |
out$difftime <- difftime(out$t_interp, dplyr::lag(out$t_interp)) | |
out[ , t_interp := data.table::fcase( | |
difftime > max_diff, as.POSIXct(NA, tz = 'CET'), | |
difftime >= 0, t_interp, | |
is.na(difftime), t_interp | |
)] | |
# Add first & last timestamp and interpolate once again... | |
out$t_interp[1] <- min(t$date) | |
out$t_interp[nrow(out)] <- max(t$date) | |
out$t_interp <- zoo::na.approx(out$t_interp) %>% | |
as.POSIXct(origin = '1970-01-01', tz = 'CET') | |
} # Close while | |
# And like magic it's all cleaned up. The color gradient | |
# on our points is smooth, indicating no weird time jumps. | |
p4 <- ggplot(data = st_as_sf(out), aes(color = t_interp)) + geom_sf() + labs(color = "Date") | |
ggpubr::ggarrange(p1, p2, p3, p4, nrow = 2, ncol = 2, common.legend = TRUE) | |
# Step 7: Final cleanup | |
# At the time of this writing, converting an R data object to | |
# data.table removes the sf class. So let's reset that. | |
out <- st_as_sf(out) | |
# Step 8: Map it! | |
# Final step is to plot it up all pretty. First, let's download | |
# a land polygon from the rnaturalearth package. | |
world <- rnaturalearth::ne_countries(scale = 10, # high res, large scale | |
country = c("United Kingdom", | |
"Netherlands", | |
"Belgium", | |
"France"), | |
returnclass = "sf") | |
# Set the CRS | |
# Let's set the CRS to the local UTM zone so the tracks aren't | |
# distorted - UTM 31N | |
out <- st_transform(out, crs = 25831) | |
world <- st_transform(world, crs = 25831) | |
# Set the bounding box for the plot | |
bbox <- st_bbox(out) | |
# Extract lat & long for our plot | |
out$lat <- st_coordinates(out)[,2] | |
out$lon <- st_coordinates(out)[,1] | |
# Now plot it! We'll use viridis to color the line. | |
library(viridis) | |
map <- ggplot() + | |
geom_sf(data = world, | |
lwd = 0.1, | |
color = "#C3C3C3", | |
fill = "#CBCACA") + | |
geom_path(data = out, | |
aes(x = lon, | |
y = lat, | |
color = t_interp), | |
size = 0.8, | |
lineend = "round") + | |
scale_color_viridis() + | |
coord_sf(xlim = bbox[c(1,3)], | |
ylim = bbox[c(2,4)], | |
expand = T) + | |
theme_minimal() | |
# This could certainly look prettier, with a nicer scale bar. | |
# Let's pull out pretty looking dates from the data and do some | |
# scalebar magic to change the labels to look nicer. | |
scale_dates <- pretty(out$t_interp) | |
map <- map + | |
scale_color_viridis_c(breaks = as.numeric(scale_dates), | |
labels = format(scale_dates, format = "%b '%y"), | |
limits = c(min(as.numeric(scale_dates)), max(as.numeric(scale_dates))), | |
guide = guide_colorbar(direction = "horizontal", | |
title.position = "top", | |
label.position = "bottom")) | |
# Much better - now all that's needed is some aesthetic tidying up. | |
# As a fair warning, this stuff can take FOREVER and is typically | |
# what I spend the most time on.... | |
map <- map + theme(text = element_text(family = "Karla", color = "#2D2D2E"), # change the font | |
legend.position = c(0.2,0.08), # Move the legend to the bottom left corner | |
legend.margin = margin(t = 0, unit = "cm"), # Set top of legend margin to zero | |
legend.title = element_blank(), # Remove legend title | |
legend.text = element_text(size = 8, color = "#878585"), # Adjust legend text size | |
legend.key = elementalist::element_rect_round(radius = unit(0.5, "snpc")), # Make the scalebar have rounded edges (thanks to elementalist pkg) | |
legend.key.width = unit(1, "cm"), # Change legend colorbar width | |
legend.key.height = unit(0.1, "cm"), # Change legend colorbar height | |
legend.background = element_blank(), # Remove legend background | |
axis.title = element_blank(), # Remove axis titles | |
panel.grid = element_line(color = "#ebebe5", size = 0.2), # Change lat/long graticule line colors | |
panel.background = element_rect(fill = "#f5f5f2", color = NA)) # Change plot panel background color | |
# Maybe add a caption to top it all off. | |
map <- map + | |
annotate(geom = "text", | |
x = 326000, | |
y = 5600000, | |
label = "Herring Gull H903604", | |
family = "Karla", | |
color = "#2D2D2E", | |
size = 5, | |
hjust = 0) + | |
annotate(geom = "text", | |
x = 326000, | |
y = 5593000, | |
label = "Gull H903604 makes a trip to the UK.", | |
family = "Karla", | |
color = "#2D2D2E", | |
size = 3, | |
hjust = 0) | |
# Step 9: BONUS! Bundle it all up into a neat function. | |
# Which you can see on my gitlab here :) | |
# https://gitlab.com/popovs/srha/-/blob/main/R/movement.R |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment