Skip to content

Instantly share code, notes, and snippets.

@mbjoseph
Created February 25, 2021 23:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mbjoseph/2925ff5126fccb9e9e18adc2d65e4b97 to your computer and use it in GitHub Desktop.
Save mbjoseph/2925ff5126fccb9e9e18adc2d65e4b97 to your computer and use it in GitHub Desktop.
Minimal example of a Bayesian finite sample approach for wildfires using brms
library(tidyverse)
library(sf)
library(here)
library(brms)
library(lubridate)
library(reshape2)
# Get ecoregion data ------------------------------------------------------
download.file("ftp://newftp.epa.gov/EPADataCommons/ORD/Ecoregions/us/us_eco_l4.zip",
destfile = "ecoregions.zip")
unzip("ecoregions.zip")
ecoregions <- st_read("us_eco_l4_no_st.shp") %>%
st_make_valid() %>%
# summarize to level 3 regions (assuming we don't want to go down to level 4)
group_by(L3_KEY, L2_KEY, L1_KEY) %>%
summarize()
ecoregions <- ecoregions %>%
ungroup %>%
mutate(eco_area_sq_m = st_area(ecoregions),
eco_area_sq_km = as.numeric(eco_area_sq_m) / 1000000)
ecoregions %>%
ggplot() +
geom_sf()
# Get MTBS data -----------------------------------------------------------
download.file("https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/MTBS_Fire/data/composite_data/fod_pt_shapefile/mtbs_fod_pts_data.zip",
destfile = "mtbs.zip")
unzip("mtbs.zip")
mtbs <- st_read("mtbs_FODpoints_DD.shp") %>%
st_transform(st_crs(ecoregions)) %>%
filter(Incid_Type == "Wildfire",
BurnBndAc > 1e3) %>%
mutate(year = year(Ig_Date),
month = month(Ig_Date))
mtbs %>%
ggplot() +
geom_sf()
# Find the ecoregions for each MTBS event ---------------------------------
mtbs_eco <- st_intersection(mtbs, ecoregions)
mtbs_eco %>%
filter(grepl("GREAT PLAINS", L1_KEY))%>%
ggplot() +
geom_sf(aes(color = L2_KEY))
# Generate clean count and size data sets ----------------------
nonzero_counts <- mtbs_eco %>%
as_tibble %>%
count(year, month, L3_KEY)
ecoregion_keys <- ecoregions %>%
as_tibble %>%
select(ends_with("KEY"), eco_area_sq_km)
# start with a small subset of the data to reduce model run times
focal_l1_region <- "GREAT PLAINS"
max_year_train_set <- 2010
all_counts <- expand.grid(year = 1984:2018,
month = 1:12,
L3_KEY = levels(ecoregions$L3_KEY)) %>%
as_tibble %>%
filter(!(year == 1984 & month == 1)) %>% # Jan 1984 is not in MTBS
left_join(nonzero_counts) %>%
mutate(n = ifelse(is.na(n), 0, n)) %>%
left_join(ecoregion_keys)
train_counts <- all_counts %>%
filter(
grepl(focal_l1_region, L1_KEY), # start by focusing on one ecoregion
year < max_year_train_set # start with a subset of years
)
train_sizes <- mtbs_eco %>%
as_tibble %>%
filter(
grepl(focal_l1_region, L1_KEY), # start by focusing on one ecoregion
year < max_year_train_set # start with a subset of years
)
test_sizes <- mtbs_eco %>%
as_tibble %>%
filter(grepl(focal_l1_region, L1_KEY)) %>%
anti_join(train_sizes)
# Fit a count model -------------------------------------------------------
# the smoother with bs = "cc" is a cyclic spline smoother (essentially an
# easy way to account for seasonality, though in a real model it would be
# better to include climate data instead)
count_model <- brm(n ~ s(month, bs = "cc") + (1 | L2_KEY) + (1 | L3_KEY) +
offset(log(eco_area_sq_km)),
data = train_counts,
family = zero_inflated_negbinomial(),
cores = 4
)
summary(count_model)
plot(conditional_effects(count_model), ask = FALSE)
# Fit a fire size model ---------------------------------------------------
size_model <- brm(BurnBndAc ~ s(month, bs = "cc") + (1 | L2_KEY) + (1 | L3_KEY),
data = train_sizes,
family = lognormal(),
cores = 4)
summary(size_model)
plot(conditional_effects(size_model), ask = FALSE)
# Combine outputs from count and size model to infer total burn area ------
test_counts <- anti_join(all_counts, train_counts) %>%
filter(L3_KEY %in% train_counts$L3_KEY) %>%
mutate(test_row_index = 1:n())
# generate a set of count predictions for each row in the test data
count_predictions <- predict(count_model,
summary = FALSE,
newdata = test_counts,
allow_new_levels = TRUE) %>%
reshape2::melt(varnames = c("iter", "test_row_index")) %>%
as_tibble %>%
filter(value > 0) # we only need to predict sizes for predicted events
## For each iteration, simulate total burned area values -------------------
# (note that this could be parallelized to speed things up, but a for loop
# may be easier to debug)
max_iter <- 1000
size_predictions <- list()
pb <- txtProgressBar(max = max_iter, style = 3)
for (i in 1:max_iter) {
size_pred_df <- count_predictions %>%
filter(iter == i) %>%
left_join(test_counts, by = "test_row_index")
# simulate size values
size_matrix <- predict(size_model,
newdata = size_pred_df,
summary = FALSE,
allow_new_levels = TRUE)
# choose one posterior draw at random
random_draw <- sample(1:nrow(size_matrix), size = 1)
# compute a prediction for total burned area
size_pred_df$predicted_size <- size_matrix[random_draw, ]
size_predictions[[i]] <- size_pred_df %>%
select(iter, test_row_index, predicted_size)
setTxtProgressBar(pb, i)
}
close(pb)
# Generate a clean dataset of predictions ---------------------------------
posterior_preds <- bind_rows(size_predictions) %>%
left_join(test_counts) %>%
group_by(iter, L3_KEY, year, month) %>%
summarize(n_events_pred = length(predicted_size),
total_burned_area_pred = sum(predicted_size),
maximum_fire_size_pred = max(predicted_size)) %>%
ungroup %>%
# fill implicit zeros (occurs when no events are predicted)
complete(iter, L3_KEY, year, month,
fill = list(
n_events_pred = 0,
total_burned_area_pred = 0,
maximum_fire_size_pred = 0
)) %>%
filter(!(year == 1984 & month == 1), # Jan 1984 is not in MTBS
L3_KEY %in% train_counts$L3_KEY) %>%
mutate(first_day_of_month = as.Date(paste(year,
sprintf("%02d", month),
"01",
sep = "-")))
# Visualize predictions ---------------------------------------------------
# show one line per posterior iteration
posterior_preds %>%
ggplot(aes(first_day_of_month, total_burned_area_pred)) +
geom_line(aes(group = iter), alpha = .2) +
facet_wrap(~L3_KEY, ncol = 2) +
xlab("Date") +
ylab("Total burned area (acres)")
# show posterior median & 95 credible interval + actual withheld data
test_summary <- test_sizes %>%
mutate(first_day_of_month = as.Date(paste(year,
sprintf("%02d", month),
"01",
sep = "-"))) %>%
group_by(L3_KEY, first_day_of_month) %>%
summarize(total_burned_area = sum(BurnBndAc)) %>%
ungroup
# predictions shown as ribbons, withheld data shown as points
posterior_preds %>%
group_by(first_day_of_month, L3_KEY) %>%
mutate(
med = median(total_burned_area_pred),
lo = quantile(total_burned_area_pred, .025),
hi = quantile(total_burned_area_pred, .975)
) %>%
ggplot(aes(first_day_of_month)) +
geom_ribbon(aes(ymin = lo, ymax = hi),
color = NA, alpha = .4, fill = "darkred") +
geom_line(aes(y = med)) +
facet_wrap(~L3_KEY, ncol = 2) +
xlab("Date") +
ylab("Total burned area (acres)") +
geom_point(data = test_summary,
aes(y = total_burned_area)) +
scale_y_log10()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment