Skip to content

Instantly share code, notes, and snippets.

@grosscol
Last active October 20, 2021 22:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save grosscol/bc40383a3de09987548757208d4d75fb to your computer and use it in GitHub Desktop.
Save grosscol/bc40383a3de09987548757208d4d75fb to your computer and use it in GitHub Desktop.
Refactor nested for loop into smaller testable functions
### Refactored Birds in Trees code
library(dplyr)
library(mefa4)
# Function to create list of list of args for outer calculation function
# returns List of list(stand_id, species_list, stand_data)
make_stand_args <- function(stand_id, data){
splist <- data %>%
filter(StandID == stand_id) %>%
dplyr::select(Species) %>%
distinct()%>%
pull(Species)
result <- list(stand_id = stand_id,
species_list = splist)
return(result)
}
# Function to do the inner calculation
# calc the mden of single species,
# given duration and distance intervals of all species, and tau lookup table
calc_mden <- function(species_id, dur_intervals, dis_intervals, tau_ref){
# duration variable phi for species
y_dur <- as.matrix(dur_intervals[[species_id]])
d_dur <- matrix(c(3, 5, 10), nrow(y_dur), 3, byrow=TRUE,
dimnames=dimnames(y_dur))
# distance variable tau
y_dis <- as.matrix(dis_intervals[[species_id]])
d_dis <- matrix(c(0.5, 1), nrow(y_dis), 2, byrow=TRUE,
dimnames=dimnames(y_dis))
# grab tau for specific species
tau <- tau_ref[which(tau_ref$Sp == species_id),3]
rmax <- apply(d_dis, 1, max)
q <- (tau^2 / rmax^2) * (1-exp(-(rmax/tau)^2))
A <- pi * rmax^2
a_q <- A * q
y_total <- rowSums(y_dur)
m_den <- mean(y_total/a_q$tau_best)
return(m_den)
}
# Outer calculation function
# return a list of mdens for each stand
# Each list of mdens would have one entry for each species.
calc_stand_mdens <- function(stand_id, species_list, tau_ref, all_dis, all_dur){
names(species_list) <- species_list
species_mdens <- lapply(X=species_list, FUN=calc_mden,
dur_intervals=all_dur, dis_intervals=all_dis,
tau_ref=tau_ref)
return(species_mdens)
}
# Wrap do.call because I hate trying to stuff it into lapply directly.
# Combine dynamic and static args into single arguments list
splat_wrapper <- function(dynamic_args, static_args){
args <- c(dynamic_args, static_args)
do.call(what=calc_stand_mdens, args=args)
}
# Input Parameters
raw_data <- source('~/Downloads/SampleBirdData.txt')$value
tau_ref <- source('~/Downloads/tau_ref.txt')$value
stand_list <- source('~/Downloads/sample_standlist.txt')$value
# Correct column is a list of lists issue in source data
qpadbirdData <- raw_data
qpadbirdData$Species <- unlist(raw_data$Species, use.names=F)
# Calculate the crosstabs for distance and duration for all species
# time Intervals
all_dur <- mefa4::Xtab(~ VisitID + Dur + Species, qpadbirdData)
# distance intervals
all_dis <- mefa4::Xtab(~ VisitID + Dis + Species, qpadbirdData)
# Create static args list of the tau_refs and crosstabs
static_args <- list(tau_ref=tau_ref,
all_dur=all_dur,
all_dis=all_dis)
# Given a list of stands and the source data,
# make a list of arguments lists for the calculation function
dynamic_args <- lapply(X = stand_list,
FUN = make_stand_args,
data = qpadbirdData)
# Now run the foo function once for each element (set of args) from stand_args
names(stand_args) <- stand_list
stand_calcs <- lapply(X = dynamic_args,
FUN = splat_wrapper,
static_args = static_args)
c("BEF 15", "NUL 10", "BEF 16", "NUL 2", "BEF 17", "NUL 8", "BEF 14",
"NUL 14", "NUL 1", "NUL 13", "NUL 15", "NUL 6", "BEF 11", "BEF 3",
"NUL 3", "BEF 2", "BEF 8", "NUL 4", "BEF 9", "BEF 7", "NUL 9",
"NUL 7", "NUL 5", "BEF 1", "NUL 12", "NUL 11", "BEF 6", "BEF 12"
)
structure(list(StandID = c("BEF 15", "NUL 10", "BEF 16", "NUL 2",
"BEF 17", "NUL 8", "BEF 17", "BEF 17", "BEF 14", "NUL 14", "NUL 1",
"NUL 13", "NUL 15", "NUL 6", "NUL 13", "BEF 11", "BEF 14", "NUL 15",
"BEF 16", "BEF 3", "NUL 14", "BEF 3", "BEF 14", "NUL 3", "BEF 11",
"BEF 2", "NUL 2", "BEF 16", "BEF 8", "BEF 3", "BEF 8", "NUL 4",
"BEF 9", "BEF 17", "BEF 7", "NUL 14", "BEF 17", "NUL 10", "NUL 4",
"NUL 13", "NUL 9", "NUL 10", "BEF 3", "BEF 2", "BEF 3", "NUL 13",
"NUL 15", "NUL 6", "NUL 4", "NUL 7", "BEF 11", "NUL 10", "NUL 5",
"NUL 9", "BEF 1", "NUL 12", "NUL 5", "BEF 8", "NUL 7", "NUL 8",
"BEF 9", "BEF 1", "NUL 2", "NUL 8", "BEF 7", "BEF 8", "BEF 9",
"NUL 8", "BEF 8", "NUL 15", "NUL 5", "NUL 14", "BEF 9", "NUL 13",
"NUL 10", "NUL 13", "BEF 14", "NUL 8", "NUL 6", "NUL 10", "BEF 17",
"BEF 8", "NUL 11", "BEF 6", "BEF 16", "BEF 2", "NUL 7", "NUL 14",
"BEF 16", "NUL 5", "NUL 1", "BEF 15", "NUL 9", "BEF 12", "BEF 11",
"BEF 14", "NUL 12", "NUL 5", "NUL 14", "NUL 3"), Species = structure(list(
Species = c("NOPA", "HETH", "REVI", "RBNU", "BTNW", "MYWA",
"OVEN", "BTNW", "WIWR", "BLBW", "BTNW", "HETH", "SCTA", "YBSA",
"RBNU", "BLBW", "GCKI", "BTBW", "REVI", "REVI", "REVI", "REVI",
"RBNU", "REVI", "OVEN", "OVEN", "HETH", "REVI", "BLBW", "RCKI",
"BTBW", "OVEN", "VEER", "BTNW", "BCCH", "REVI", "WIWR", "BCCH",
"OVEN", "BLJA", "YBSA", "MYWA", "BTNW", "BCCH", "PAWA", "BTBW",
"NOFL", "REVI", "YBSA", "REVI", "CSWA", "SOSP", "BTNW", "REVI",
"BTBW", "BOCH", "YBSA", "CAWA", "BTNW", "HETH", "DEJU", "HETH",
"WTSP", "YBSA", "BTBW", "BLJA", "VEER", "REVI", "BTBW", "BTNW",
"REVI", "BTBW", "BTBW", "YBSA", "AMRE", "RUGR", "BBCU", "RBNU",
"BLBW", "REVI", "BTBW", "NAWA", "BTBW", "GCKI", "BCCH", "RBNU",
"HETH", "OVEN", "BTNW", "BLBW", "REVI", "REVI", "REVI", "ALFL",
"COYE", "HETH", "REVI", "SWTH", "OVEN", "REVI")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame")), Dur = c("5-10min",
"0-3min", "0-3min", "5-10min", "5-10min", "5-10min", "0-3min",
"5-10min", "0-3min", "3-5min", "5-10min", "0-3min", "0-3min",
"3-5min", "0-3min", "0-3min", "5-10min", "0-3min", "0-3min",
"5-10min", "3-5min", "5-10min", "0-3min", "0-3min", "0-3min",
"0-3min", "3-5min", "5-10min", "0-3min", "0-3min", "0-3min",
"0-3min", "0-3min", "0-3min", "0-3min", "0-3min", "0-3min", "3-5min",
"0-3min", "0-3min", "3-5min", "0-3min", "0-3min", "0-3min", "3-5min",
"0-3min", "5-10min", "0-3min", "3-5min", "0-3min", "0-3min",
"0-3min", "0-3min", "3-5min", "0-3min", "0-3min", "5-10min",
"0-3min", "0-3min", "0-3min", "0-3min", "0-3min", "0-3min", "5-10min",
"0-3min", "5-10min", "3-5min", "5-10min", "3-5min", "3-5min",
"5-10min", "0-3min", "0-3min", "0-3min", "0-3min", "0-3min",
"3-5min", "5-10min", "3-5min", "0-3min", "0-3min", "0-3min",
"0-3min", "0-3min", "0-3min", "0-3min", "0-3min", "0-3min", "5-10min",
"0-3min", "0-3min", "0-3min", "0-3min", "0-3min", "0-3min", "5-10min",
"0-3min", "0-3min", "0-3min", "0-3min"), Dis = c("0-50m", "50-100m",
"50-100m", "0-50m", "50-100m", "50-100m", "0-50m", "0-50m", "50-100m",
"50-100m", "0-50m", "50-100m", "0-50m", "50-100m", "50-100m",
"0-50m", "50-100m", "50-100m", "50-100m", "50-100m", "50-100m",
"50-100m", "0-50m", "0-50m", "0-50m", "50-100m", "50-100m", "0-50m",
"50-100m", "0-50m", "50-100m", "50-100m", "0-50m", "50-100m",
"50-100m", "50-100m", "50-100m", "50-100m", "50-100m", "50-100m",
"50-100m", "50-100m", "50-100m", "0-50m", "50-100m", "0-50m",
"0-50m", "50-100m", "50-100m", "0-50m", "50-100m", "50-100m",
"50-100m", "50-100m", "50-100m", "0-50m", "0-50m", "0-50m", "0-50m",
"0-50m", "0-50m", "50-100m", "50-100m", "0-50m", "0-50m", "50-100m",
"0-50m", "50-100m", "50-100m", "50-100m", "50-100m", "50-100m",
"0-50m", "0-50m", "0-50m", "50-100m", "0-50m", "0-50m", "50-100m",
"0-50m", "0-50m", "50-100m", "0-50m", "0-50m", "0-50m", "0-50m",
"0-50m", "50-100m", "0-50m", "50-100m", "0-50m", "50-100m", "50-100m",
"50-100m", "50-100m", "0-50m", "0-50m", "50-100m", "0-50m", "50-100m"
), VisitID = c("BEF 15 A 182", "NUL 10 A 154", "BEF 16 B 160",
"NUL 2 D 172", "BEF 17 D 158", "NUL 8 B 154", "BEF 17 B 182",
"BEF 17 C 202", "BEF 14 B 158", "NUL 14 B 193", "NUL 1 B 197",
"NUL 13 D 152", "NUL 15 B 150", "NUL 6 D 173", "NUL 13 A 152",
"BEF 11 A 159", "BEF 14 A 182", "NUL 15 B 150", "BEF 16 C 160",
"BEF 3 A 179", "NUL 14 F 152", "BEF 3 D 179", "BEF 14 B 182",
"NUL 3 D 197", "BEF 11 E 159", "BEF 2 E 162", "NUL 2 C 172",
"BEF 16 C 180", "BEF 8 A 180", "BEF 3 B 179", "BEF 8 A 180",
"NUL 4 A 196", "BEF 9 C 160", "BEF 17 F 158", "BEF 7 E 180",
"NUL 14 D 176", "BEF 17 F 182", "NUL 10 D 194", "NUL 4 A 196",
"NUL 13 E 193", "NUL 9 C 195", "NUL 10 F 194", "BEF 3 F 179",
"BEF 2 E 162", "BEF 3 C 162", "NUL 13 C 193", "NUL 15 D 150",
"NUL 6 C 173", "NUL 4 C 196", "NUL 7 A 154", "BEF 11 C 159",
"NUL 10 D 154", "NUL 5 B 196", "NUL 9 A 154", "BEF 1 D 202",
"NUL 12 B 153", "NUL 5 D 196", "BEF 8 F 180", "NUL 7 B 154",
"NUL 8 C 154", "BEF 9 C 204", "BEF 1 E 179", "NUL 2 A 155", "NUL 8 D 174",
"BEF 7 D 180", "BEF 8 E 180", "BEF 9 A 180", "NUL 8 A 195", "BEF 8 C 160",
"NUL 15 E 150", "NUL 5 B 155", "NUL 14 B 176", "BEF 9 A 180",
"NUL 13 D 193", "NUL 10 B 175", "NUL 13 F 176", "BEF 14 A 158",
"NUL 8 C 154", "NUL 6 C 155", "NUL 10 E 194", "BEF 17 D 202",
"BEF 8 F 180", "NUL 11 F 194", "BEF 6 A 178", "BEF 16 C 160",
"BEF 2 B 162", "NUL 7 B 174", "NUL 14 D 193", "BEF 16 D 180",
"NUL 5 B 196", "NUL 1 D 172", "BEF 15 C 158", "NUL 9 D 154",
"BEF 12 F 159", "BEF 11 D 159", "BEF 14 B 201", "NUL 12 E 153",
"NUL 5 A 155", "NUL 14 C 193", "NUL 3 E 172")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
structure(list(Sp = c("RTHU", "BLBW", "GCKI", "BOCH", "BTBW",
"BRCR", "MAWA", "CMWA", "BAWW", "AMRE", "BHVI", "NOPA", "CSWA",
"CAWA", "BTNW", "MOWA", "WIWR", "REVI", "YBFL", "RBGR", "LEFL",
"PAWA", "BLPW", "DOWO", "YBSA", "OVEN", "DEJU", "CEDW", "SCTA",
"VEER", "SOSP", "MYWA", "EAWP", "BCCH", "WBNU", "SWSP", "INBU",
"BEKI", "NAWA", "HAWO", "ALFL", "GCFL", "AMGO", "WTSP", "SWTH",
"LISP", "PUFI", "RCKI", "AMRO", "BLJA", "RBNU", "COYE", "RUGR",
"NOWA", "HETH", "BBCU", "NOFL", "PIWO"), tau = c(0.298999, 0.470045,
0.388122, 0.474593, 0.452795, 0.464176, 0.607747, 0.5223915,
0.615874, 0.6017003, 0.560901, 0.563834, 0.747081, 0.625857,
0.595276, 0.777684, 0.590151, 0.69453, 0.633242, 0.931138, 0.769924,
0.631699821, 0.631699821, 0.6087145, 0.802057, 0.798386, 0.618904,
0.7378935, 0.676319, 0.918361, 0.603962, 0.681626, 0.837621,
0.8056039, 0.743488, 0.932883, 0.702112, 0.818148, 0.689438,
0.782298, 0.978404, 0.914948, 0.836709, 1.169883, 0.7810633,
0.777799, 0.794372, 0.898033, 0.929143, 1.074654, 0.932979, 0.770593,
1.10043, 0.723556, 1.27119, 1.88797, 1.37911, 1.45686), tau_best = c(0.2110982,
0.364099, 0.379388, 0.409157, 0.447038, 0.449505, 0.452729, 0.460238,
0.461154, 0.4969422, 0.513969, 0.535236, 0.535671, 0.536698,
0.537636, 0.537947, 0.545593, 0.550853, 0.58209, 0.58465, 0.589032,
0.595738, 0.595738, 0.601205, 0.608763, 0.612224, 0.621086, 0.62365,
0.631127, 0.666315, 0.66755, 0.677063, 0.706598, 0.7102644, 0.713479,
0.719708, 0.747621, 0.751698, 0.753327, 0.7644505, 0.7648203,
0.767414, 0.780721, 0.792044, 0.7953055, 0.811148, 0.83448, 0.879949,
0.918185, 0.918586, 0.93674, 0.942806, 0.955041, 1.09869, 1.22448,
1.25339, 1.41392, 1.43079)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -58L))
@grosscol
Copy link
Author

Only issue I had trying to build myself a reproducible example is that I have no idea where TauRef comes from, and that will have to be passed in to the outer calculation to get used in the inner calculation if it's some lookup table or something.

@mthompson89
Copy link

ok so this all looks good, but I think the only issue is the duration and distance intervals. what that section of the function is doing is yall_dur and yall_dis are doing is organizing the data of all species into time and distance bins. and it is visit specific (i.e. hermit thrush at stand 1, point 3 visit 2 has a different count in each time interval than hermit thrush at stand 1, point 3, visit 1) so the calc_stand_mden needs to be worked into the calc_mden. give me a few minutes to try and work it out on my own.

@grosscol
Copy link
Author

@mthompson89 I updated the lapply call of the inner calculation. I realized I had left off passing in the intervals. Also, added in the passed in tau lookup table.

@mthompson89
Copy link

oh sick. yes, after playing around I think this will work beautifully, standby

@mthompson89
Copy link

@grosscol it doesn't look like it is seing the tau_ref argument. I made a sample dataset that you can try to run. is there a way to upload it to this github?

@mthompson89
Copy link

I think I fixed that issue by adding tau_ref to the make_stand_args() function

make_stand_args <- function(stand_id, tau_ref, data){
  splist <- data %>%
    filter(StandID == stand_id) %>%
    dplyr::select(Species) %>%
    distinct()
  
  spdata <- data %>%
    filter(StandID == stand_id)
  
  result <- list(stand_id     = stand_id,
                 species_list = splist,
                 stand_data   = spdata,
                 tau_ref      = tau_ref)
  return(result)
}

@grosscol
Copy link
Author

grosscol commented Oct 20, 2021

Good catch. The invariant argument would need to be added to splat_wrapper, and the static args added to the dynamic ones. Derp.

Adding it to the args list works. I usually keep those args lists for things that change with each call invocation, but in this case I think your way is simpler.

As for uploading a sample data set, you could upload the output of dput for a sample of the data as your own gist. The r-help-readme channel probably has a bunch of tips about that.

@mthompson89
Copy link

ok I just uploaded a sample. I am getting an error that says this when you show traceback. it doesn't like the fact that all_dur is a list but if I get stand_calc_mdens to return all_dur and then run it seperately and use as.matrix(all_dur[[species_id]] manually it works fine

Error in h(simpleError(msg, call)) : 
error in evaluating the argument 'x' in selecting a method for function 'as.matrix': invalid subscript type 'list'
9.
h(simpleError(msg, call))
8.
.handleSimpleError(function (cond) 
.Internal(C_tryCatchHelper(addr, 1L, cond)), "invalid subscript type 'list'", 
base::quote(dur_intervals[[species_id]]))
7.
as.matrix(dur_intervals[[species_id]])
6.
FUN(X[[i]], ...)
5.
lapply(X = species_list, FUN = calc_mden, dur_intervals = all_dur, 
dis_intervals = all_dis, tau_ref = tau_ref)
4.
(function (stand_id, species_list, stand_data, tau_ref) 
{
all_dur <- Xtab(~VisitID + Dur + Species$Species, stand_data)
all_dis <- Xtab(~VisitID + Dis + Species$Species, stand_data) ...
3.
do.call(what = calc_stand_mdens, args = args)
2.
FUN(X[[i]], ...)
1.
lapply(X = stand_args, FUN = splat_wrapper)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment