Skip to content

@noamross /fishlakes.csv
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
GAMs using mgcv and amphibian presence/absence dataset
We can make this file beautiful and searchable if this error is corrected: It looks like row 2 should actually have 1 column, instead of 35.
#Data provided for purpose of R example only. Not for other use without permission. Contact Rosemary Hartman (rosehartman at gmail dot com.)
lake.ID,name,treatment,shorline,sizehectars,elevation,fish.y.n,fish.breeding,BUBO.breeding,PSRE.breeding,TAGR.present,RACA.present,RACA.breeding,adult.RACA,YOPY.RACA,meta.RACA,RACA.eggs,RACA.larvae,veg.extent,transect.with.veg,ave.0.1m,ave.0.5m,ave.1m,bank.slope,pH,LGWD,SMWD,silt.0.1,silt.total,area.10cm,vegetated.area,near.type,count.RACApops.1K,count.lakes1K,weighted.dist
533,East Boulder,Co,1356.4,11.4,2034.8448,y,y,y,y,n,y,y,6,2,0,0,350,2.28,0.292,0.722916667,5.016666667,10.68333333,0.092475736,8.1,0,0.0417,0.376,0.3472,980.56,3092.59,RACA,5,10,0.8268
534,Mosquito,Co,555.65,2.221,2017,y,y,y,y,n,y,y,2,0,0,0,70,0.425,0.292,0.520833333,4.291666667,14.2875,0.067204288,9,0.25,0.478,0.375,0.6806,289.4,236.15,RACA,0,2,0.52
537,Upper Boulder,Co,530.48,2.009,1927,y,y,y,y,n,y,y,3,5,0,0,240,1.17,0.875,0.46,5.125,14.11666667,0.068711911,8.3,0.04,0,0.875,0.9306,244.02,620.66,FISH,4,9,0.8312
539,Big Marshy,Co,667.29,2.813,1911,y,y,y,y,n,y,y,3,2,0,0,180,0.517,0.826,0.23,1.26,3,0.331579018,8.5,0.26,0.3333,0.83333,0.5278,153.48,344.99,RACA,2,4,0.563
543,Little Marshy,Co,339.29,0.661,1875,y,y,y,y,y,y,y,2,0,0,0,840,0.27,0.875,0.18,2.15,4.142857143,0.233320289,8.1,0.67,0.75,1,1,61.07,91.61,CO,3,4,0.5114
544,Middle Boulder,Co,604.369,2.493,1984.8576,y,y,y,n,n,y,y,7,2,0,0,8000,2.48,0.636,0.327272727,3.313636364,11.25,0.08493805,8.5,0.318,0.54545,0.6364,0.81818,197.79,1498.84,CO,3,7,0.9013
554,Telephone,Co,534.796,1.493,2098.8528,y,n,y,y,n,y,y,8,0,0,0,20,0.292,0.291,0.333333333,1.825,3.9125,0.254833499,8,0,0.4583,0.3333,0.29167,178.27,156.16,CO,4,5,0.4198
561,Virginia,Co,453.892,1.43,2008.0224,y,y,n,n,n,y,y,7,0,0,0,350,1.2,0.708,0.333333333,3.6125,7.095833333,0.136797022,7.4,2.71,0.875,0.9167,0.8472,151.3,544.67,FISH,0,1,0.4821
574,Fish,Co,525.934,1.558,1821.18,y,y,y,n,n,y,y,3,5,0,0,367,6.36,1,0.583333333,6.5625,NA,0.07240347,6.2,0.167,0.217,1,1,306.79,3344.94,RACA,1,2,1.0843
609,Rush Creek,Co,453.08,1.311,2013,y,y,y,y,n,y,y,5,0,0,1,100,1.45,0.9583,0.54,3.995833333,7.038095238,0.13807655,7.45,1.79,0.75,1,0.97222,244.67,656.97,RACA,0,0,0
612,McDonald,Co,714.23,2.77,1797,y,y,n,y,n,y,y,6,0,0,0,57,2.54,0.8333,0.73,3.11,6.09,0.165445662,7.1,1.87,0.958,0.875,0.8611,521.39,1814.15,CO,1,6,0.1235
681,"Boulder Lake, Big",Co,654.43,2.642,1850,y,y,y,y,y,y,n,4,0,1,0,0,0.895,0.571,0.252,4.305,10.21,0.094854622,7.3,2.048,0.7619,0.857,0.9206,164.92,585.72,FISH,3,7,0.5311
682,Conway,Co,217.701,0.283,2078.736,y,y,n,n,n,y,y,2,0,0,0,11,1.426,0.842,0.632,3.247368421,9.331578947,0.105423578,7.6,1.053,0.4737,0.8421,0.8596,137.59,310.44,FISH,1,16,0.0902
688,Union,Co,666.691,1.49,1837.3344,y,y,n,y,n,y,y,9,3,2,0,1,0.654,0.583,2,10,20,0.05,9.2,2.5,0.6667,0.7083,0.8056,500,436.02,RACA,2,4,1.091
722,Salmon,Co,490.09,0.658,2179,y,y,y,y,n,y,y,6,1,0,4,200,1.29,0.895,0.58,2.17,3.8,0.265192487,6.6,1.29,0.3158,0.9474,0.8567,284.25,632.21,RACA,1,11,0.1394
732,El,Co,1071.03,2.259,1990,y,y,y,y,n,y,y,5,0,2,0,635,0.22,0.2,0.49,5.18,5.26,0.141795358,7.1,4.35,0.8,0.9,0.867,524.8,235.63,FISH,0,4,0.2779
531,Mill Creek,fish,377.96,0.713,2008.9368,y,n,y,n,n,y,n,1,0,0,0,0,0.691,0.682,0.295,1.386363636,3.25,0.308095216,8.3,0.591,0.636,0.6818,0.71212,111.5,261.17,RACA,3,4,0.5538
553,Fox Creek,fish,661.25,2.791,2003,y,y,n,y,n,y,n,3,0,0,0,0,2.16,0.542,0.46,6.045833333,10.31363636,0.091902485,7.2,1.04,0.667,0.8333,0.90278,304.17,1428.3,FISH,3,4,0.7386
555,Mavis,fish,502.95,1.721,2048,y,n,n,y,n,y,n,3,1,0,0,0,1.8,0.565,0.37,3.186956522,6.37826087,0.153416601,7.05,2.87,1,0.4782,0.6956,186.09,905.31,RACA,2,2,0.6144
556,West Boulder,fish,552.5,2.053,2122.3224,y,n,y,y,n,n,n,0,0,0,0,0,0.2,0.25,0.25,1.108333333,2.525,0.398011014,7.7,0.583,0.833,0.1667,0.1691,138.12,110.5,RACA,2,2,0.5504
563,Tangle Blue,fish,870.27,4.534,1751,y,y,y,y,y,y,n,1,0,0,0,0,2.08,0.833,0.358,2.43,4.48,0.218926013,7.7,0.375,0.583,0.9583,0.8889,311.56,1810.16,RACA,0,0,0.5399
573,Twin 1,fish,261.673,0.415,1967.1792,y,y,y,y,n,y,n,1,0,0,0,0,4.43,0.857,1.2,4.007142857,21.275,0.043439116,8.6,4.43,0.1429,1,1,314.01,1159.21,RACA,1,5,0.44
576,Long Gulch,fish,1021.974,7.362,1952.5488,y,y,y,n,n,n,n,0,0,0,0,0,0.9125,0.667,0.366666667,1.7,3.425,0.293120249,7.1,3.29,1,0.583,0.513,374.72,932.55,RACA,0,0,0.1676
588,Trail Gulch,fish,983.875,6.777,1961.6928,y,y,y,n,n,n,n,0,0,0,0,0,0.45,0.4583,0.329166667,1.454166667,3.179166667,0.316543667,8.1,1.79,0.875,0.54166,0.4167,323.86,442.74,RACA,0,0,0.6774
593,Granite,fish,594.77,2.297,1964.7408,y,y,n,n,n,n,n,0,0,0,0,0,0,0,0.220833333,1.5875,2.804166667,0.347121369,8.1,1.71,0.9583,0.79167,0.875,131.35,0,RACA,0,0,0.5445
601,Doe,fish,543.717,1.672,2131.1616,y,y,y,n,n,n,n,0,0,0,0,0,0,0,0.170833333,1.683333333,3.4,0.286799343,7.7,2.21,0.91666,0.7917,0.69444,92.88,0,RACA,0,0,0.5531
611,Stoddard,fish,1349.484,12.69,1777.5936,y,n,n,y,n,y,n,1,2,0,0,0,0.492,0.7917,0.470833333,2.591666667,4.670833333,0.21156318,8.1,0.583,0.5417,0.333,0.5278,635.38,663.95,CO,2,6,0.2882
666,Sugar Pine,fish,638.939,2.474,2005.2792,y,y,n,n,y,n,n,0,0,0,0,0,0.192,0.0833,0.220833333,1.504166667,2.916666667,0.337661923,8,0.792,0.7083,0.4583,0.5139,141.1,122.68,FISH,1,2,0.1666
683,Foster,fish,680.13,2.662,2208.276,y,y,n,n,n,y,n,1,0,0,0,0,0,0,0.3625,3.041666667,5.245833333,0.183714243,8.2,2.333,0.833,0.125,0.4028,246.55,0,FISH,1,13,0.1753
684,Lion,fish,471.651,1.544,2132.3808,y,y,n,n,n,y,n,1,0,0,0,0,0.129,0.5417,0.145833333,0.75,1.883333333,0.527896107,8.5,0.5,0.583,0.625,0.444,68.78,60.84,CO,2,18,0.1694
686,Little Boulder,fish,531.091,1.897,1925.7264,y,n,n,n,n,n,n,0,0,0,0,0,1.181,0.857,0.219047619,1.338095238,2.733333333,0.362842014,8.7,1.19,0.762,0.619,0.571,116.33,627.22,FISH,1,3,0.4532
700,Lilypad,fish,349.108,0.846,1915.3632,y,y,y,n,n,n,n,0,0,0,0,0,1.65,0.789,0.705263158,3.636842105,12.67894737,0.076035504,8.1,0.737,0.526,1,1,246.21,576.03,RACA,1,6,0.25
727,Twin 2,fish,238.123,0.319,1549.908,y,y,n,y,y,y,n,1,0,0,0,0,0.281,0.25,0.075,2.1375,11.30625,0.080434362,8.3,2.125,0.75,1,1,17.86,66.91,RACA,1,4,0.34
760,Luella,fish,357,0.942,2110.1304,y,y,y,y,n,y,n,3,0,0,0,0,0.025,0.0833,0.066666667,0.766666667,2.020833333,0.480850091,9.6,0.583,0.7917,0.5417,0.36111,23.8,8.92,FISH,2,7,0.16
765,Forbidden 1,fish,453.246,0.773,1869.948,y,y,n,y,n,n,n,0,0,0,0,0,2.38,0.6,0.645,4.545,3.44,0.162730513,7.3,0.2,0.45,0.6,0.63333,292.34,1078.73,RACA,6,10,0.5672
767,Forbidden 2,fish,272.539,0.355,1875.1296,y,y,n,n,n,y,n,2,0,0,0,0,0.141,0.3182,0.186363636,0.977272727,1.881818182,0.529263988,NA,0.545,0.409,0.4545,0.363636,50.79,38.43,FISH,4,7,0.54
770,Deer,fish,440.21,1.314,2176,y,y,y,n,n,n,n,5,0,0,0,0,0.48,0.625,0.29,1.23,2.73,0.369188018,8.7,0.04,0.0833,0.875,0.75,127.66,211.3,FISH,1,4,0.3665
771,Diamond,fish,372.57,0.949,2200,y,y,n,n,n,n,n,0,0,0,0,0,0.7,0.217,0.02,0.49,1.73,0.546235472,8.85,0.04,0.087,0.8696,0.8116,7.45,260.8,RACA,1,6,0.33
773,Summit,fish,738.19,3.636,2300.0208,y,y,n,n,n,n,n,0,0,0,0,0,0,0,0.2,1.2,2.616666667,0.379639963,8.6,0.083,0.2083,0,0,147.64,0,FISH,2,4,0.428
26177,South Fork 1,fish,529.882,1.823,2041.5504,y,y,n,n,y,n,n,0,0,0,0,0,0.191,0.45833,0.6,3.341666667,5.8625,0.167922684,8.6,1.416,1,0.4583,0.5556,317.93,101.21,FISH,0,4,0
26184,U Boulder Pond 1,fish,243.28,0.441,2072.64,y,y,n,n,n,y,n,4,1,0,0,0,0.742,0.917,0.441666667,1.891666667,4.025,0.250498382,8.3,0.083,0.4167,0.9167,0.8889,107.45,180.51,FISH,5,10,0.5
26187,M Boulder Pond 1,fish,234.927,0.375,2133.6,y,y,n,n,n,y,n,1,0,0,0,0,0.575,1,0.15,1.25,3.233333333,0.302735107,9.1,0.25,0.5,1,0.8056,35.24,135.08,FISH,2,5,0.482
## GAMs using mgcv and amphibian presence/absence dataset
## code and data by Rosemary Hartman (rosehartman at gmail dot com)
## First, load the mgcv package (Simon Wood)
library(mgcv)
## Now, attach your data set (or my data set)
fishlakes <- read.csv("fishlakes.csv", comment.char="#")
## create a full exploritory model using varibles you think are biologically
## significant (this is pretty subjective, and where you need to use your brain)
## For my data set, I am trying to predict the presence of cascades frogs
fishlakes.logr <- gam(treatment ~ ## "treatment" = breeding cascades frogs in lake
BUBO.breeding + ## breeding toads in lake
count.RACApops.1K + ## number of cascades frog populations within 1km
s(silt.total, k=6) + ## amount of silt in lake bed
s(vegetated.area, k=7) + ## total lake area with emergent aquatic vegetation
s(area.10cm, k=6) + ## total lake area less than 10cm deep
s(weighted.dist, k=6) + ## perportion of nearby water bodies with frogs weighted by distance
s(bank.slope, k=6), ## average slope
family=binomial("logit"), data= fishlakes) ## this is a logistic function since I have binomial data
summary(fishlakes.logr) ## Note, this model explains 68.5% of devience
gam.check(fishlakes.logr) ## It seems to think my ks are too low, but raising them
## gives huge overspecification. I'm not sure what to do about this.
plot(fishlakes.logr) ## diagnostic plots for the smooths
## To choose my k's, I looped through a bunch of different values for k, and basically
## just played around until my model stopped overspecifying
for (i in 2:8) { fishlakes.logr <- gam(treatment ~ count.RACApops.1K + BUBO.breeding + s(silt.total, k=i) + s(vegetated.area, k=i) + s(bank.slope, k=i) + s(area.10cm, k=i) + s(weighted.dist, k=i), family=binomial("logit"), data= fishlakes);
x <- summary(fishlakes.logr);
y <- AIC(fishlakes.logr);
print(i)
print(x)
print(y)
}
for (i in 2:10) { fishlakes.logr <- gam(treatment ~ count.RACApops.1K + BUBO.breeding + s(silt.total, k=i) + s(vegetated.area, k=7) + s(bank.slope, k=6) + s(area.10cm, k=6) + s(weighted.dist, k=6), family=binomial("logit"), data= fishlakes);
x <- summary(fishlakes.logr);
y <- AIC(fishlakes.logr);
print(i)
print(x)
print(y)
}
## In order to see which of the candidate terms are significant, I will run the model
## several times, dropping one term out each time and then do a likelyhood ratio test
## to see if adding the term leads to a significant improvement in residual deviance.
## fishlakes2.logr is the same model as fishlakes.logr without the "bank slope" term
fishlakes2.logr <- gam(treatment ~ BUBO.breeding + count.RACApops.1K +
s(silt.total, k=6) + s(vegetated.area, k=7) + s(area.10cm, k=6) + s(weighted.dist, k=6),
family=binomial("logit"), data= fishlakes)
summary(fishlakes2.logr) ## This model only explains 34.1% of deviance
anova(fishlakes2.logr, fishlakes.logr, test = "LRT") ## There is a significant difference
## in explanitory power between the two models, that means the "bank slope" term
## should be included in the final model
## Now do the same with the rest of the varibles
fishlakes3.logr <- gam(treatment ~ BUBO.breeding + count.RACApops.1K +s(silt.total, k=6)
+ s(vegetated.area, k=7) + s(bank.slope, k=6) + s(area.10cm, k=6),
family=binomial("logit"), data= fishlakes) ## all variables except "weighted distance"
summary(fishlakes3.logr) ## This model is way overspecified (100% of devience explained)
anova(fishlakes3.logr, fishlakes.logr, test = "LRT") ## significant differnce!
## Include "weighted distance" in final model
fishlakes4.logr <- gam(treatment ~ BUBO.breeding +count.RACApops.1K+
s(silt.total, k=6) + s(vegetated.area, k=7) +
s(bank.slope, k=6) + s(weighted.dist, k=6),
family=binomial("logit"), data= fishlakes) ## all variables except area under 10cm
summary(fishlakes4.logr) ## This model only explains 54.5% of deviance
anova(fishlakes4.logr, fishlakes.logr, test = "LRT") ## the difference is not
## significant, leave "area under 10cm" out of final model
fishlakes5.logr <- gam(treatment ~ BUBO.breeding +
s(silt.total, k=6) + s(vegetated.area, k=7)+ s(area.10cm, k=6) +
s(weighted.dist, k=6), family=binomial("logit"), data= fishlakes) ## all variables except number
## of frog populations within 1km
summary(fishlakes5.logr) ## This model only explains 31.8% of deviance
anova(fishlakes5.logr, fishlakes.logr, test = "LRT") ## significant differnce!
## Include "count RACA pops" in final model
fishlakes6.logr <-gam(treatment ~ BUBO.breeding + count.RACApops.1K+
s(silt.total, k=6) + s(bank.slope, k=6) + s(area.10cm, k=6) + s(weighted.dist, k=6),
family=binomial("logit"), data= fishlakes) ## all varibles except vegetated area
summary(fishlakes6.logr) ## This model explains 61.4% of deviance
anova(fishlakes6.logr, fishlakes.logr, test = "LRT") ## Only marginally significant
## maybe drop vegetated area out of final model
fishlakes7.logr <- gam(treatment ~ BUBO.breeding + count.RACApops.1K+ s(vegetated.area, k=7) + s(bank.slope, k=6) + s(area.10cm, k=6) +
s(weighted.dist, k=6), family=binomial("logit"), data= fishlakes) ## all variables except silt
summary(fishlakes7.logr) ## This model explains 63.3% of deviance
anova(fishlakes7.logr, fishlakes.logr, test = "LRT") ## No difference. Definitely
## drop silt out of final model
fishlakes8.logr <- gam(treatment ~ count.RACApops.1K+ s(vegetated.area, k=7) + s(silt.total, k=6) +
s(bank.slope, k=5) + s(area.10cm, k=5) + s(weighted.dist, k=5),
family=binomial("logit"), data= fishlakes) ## all variables except toads
summary(fishlakes8.logr) ## This model only explains 49.7% of deviance
anova(fishlakes8.logr, fishlakes.logr, test = "LRT") ## significant differnce!
## Include breeding toads in final model
## Now that we know what variables we are using, we need to figure out how to put them all in
## a useful model.
fish1 <- gam(treatment ~ BUBO.breeding + count.RACApops.1K + s(bank.slope, k=6) + s(vegetated.area, k=7) +
s(weighted.dist, k=6), family=binomial("logit"), data= fishlakes)
summary(fish1)
## Look at AICc of the models we have so far
library("bbmle")
AICctab(fishlakes.logr, fishlakes2.logr, fishlakes4.logr, fishlakes5.logr, fishlakes6.logr, fishlakes7.logr, fishlakes8.logr, fish1, base = TRUE, weights = TRUE, nobs=nrow(fishlakes))
## Now we can come up with more models, look for interactions, cross-validate the
## best ones we have so far, or just give up.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.