Skip to content

Instantly share code, notes, and snippets.

@darmitage
Last active March 23, 2016 23:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save darmitage/4b7b7996351179b5e061 to your computer and use it in GitHub Desktop.
Save darmitage/4b7b7996351179b5e061 to your computer and use it in GitHub Desktop.
Data and code for EcoLetts submission
require(AICcmodavg)
require(mgcv)
require(boot)
require(car)
require(MASS)
require(repmis)
require(pscl)
#Read in data from my public dropbox folder
dropURL <- "https://dl.dropboxusercontent.com/u/26280366/Papers/EcoLetts/AreaNrgPseudoFinal.csv"
dat <- repmis::source_data(dropURL,sep = ",",header = TRUE)[,-1]
################################
# Richness Poisson regressions #
################################
Cand.models <- list( )
Cand.models[[1]] <- glm(Richness ~ Time, data = dat, family = "poisson")
Cand.models[[2]] <- glm(Richness ~ Area, data = dat, family = "poisson")
Cand.models[[3]] <- glm(Richness ~ NRG , data = dat, family = "poisson")
Cand.models[[4]] <- glm(Richness ~ AreaNRG, data = dat, family = "poisson")
Cand.models[[5]] <- glm(Richness ~ TimeArea, data = dat, family = "poisson")
Cand.models[[6]] <- glm(Richness ~ TimeNRG, data = dat, family = "poisson")
Cand.models[[7]] <- glm(Richness ~ TimeAreaNRG, data = dat, family = "poisson")
Cand.models[[8]] <- glm(Richness ~ cvArea, data = dat, family = "poisson")
Cand.models[[9]] <- glm(Richness ~ cvNRG , data = dat, family = "poisson")
Cand.models[[10]] <- glm(Richness ~ cvArea + cvNRG, data = dat, family = "poisson")
Cand.models[[11]] <- glm(Richness ~ TimeNRG + cvArea , data = dat, family = "poisson")
Cand.models[[12]] <- glm(Richness ~ TimeAreaNRG + cvArea , data = dat, family = "poisson")
Cand.models[[13]] <- glm(Richness ~ 1, data = dat, family = "poisson")
##create a vector of names to trace back models in set
Modnames <- paste("mod", 1:length(Cand.models), sep = " ")
##generate AICc table
tab1 = aictab(cand.set = Cand.models, modnames = Modnames, sort = FALSE)
##generate pseudo R-squared values
rsq = c()
for (i in 1:length(Cand.models)){
rsq[i] = 1- (Cand.models[[i]]$deviance / Cand.models[[i]]$null.deviance)
}
data.frame(tab1$Modnames, tab1$K, tab1$Delta_AICc, rsq)
####################################
# Richness Poisson GAM regressions #
####################################
Cand.models <- list( )
Cand.models[[1]] <- gam(Richness ~ s(Time, k = 3), data = dat, family = "poisson")
Cand.models[[2]] <- gam(Richness ~ s(Area, k = 3), data = dat, family = "poisson")
Cand.models[[3]] <- gam(Richness ~ s(NRG , k = 3), data = dat, family = "poisson")
Cand.models[[4]] <- gam(Richness ~ s(AreaNRG, k = 5), data = dat, family = "poisson")
Cand.models[[5]] <- gam(Richness ~ s(TimeArea, k = 5), data = dat, family = "poisson")
Cand.models[[6]] <- gam(Richness ~ s(TimeNRG, k = 5), data = dat, family = "poisson")
Cand.models[[7]] <- gam(Richness ~ s(TimeAreaNRG, k = 5), data = dat, family = "poisson")
Cand.models[[8]] <- gam(Richness ~ s(cvArea, k = 5), data = dat, family = "poisson")
Cand.models[[9]] <- gam(Richness ~ s(cvNRG , k = 5), data = dat, family = "poisson")
Cand.models[[10]] <- gam(Richness ~ s(cvArea, k =5) + s(cvNRG, k = 5), data = dat, family = "poisson")
Cand.models[[11]] <- gam(Richness ~ s(TimeNRG, k = 5) + s(cvArea, k = 5) , data = dat, family = "poisson")
Cand.models[[12]] <- gam(Richness ~ s(TimeAreaNRG, k = 5) + s(cvArea, k = 5) , data = dat, family = "poisson")
Cand.models[[13]] <- gam(Richness ~ 1, data = dat, family = "poisson")
##create a vector of names to trace back models in set
modnames <- paste("mod", 1:length(Cand.models), sep = " ")
##generate AICc & pseudo R-squared table
rsq = c()
AICC = c()
for (i in 1:length(Cand.models)){
rsq[i] = summary(Cand.models[[i]])$r.sq
AICC[i] = AICc(Cand.models[[i]])
}
data.frame(modnames, rsq, AICC - min(AICC))
################################
# Diversity linear regressions #
################################
Cand.models <- list( )
Cand.models[[1]] <- lm(car::logit(DiversityC) ~ Time, data = dat)
Cand.models[[2]] <- lm(car::logit(DiversityC) ~ Area, data = dat)
Cand.models[[3]] <- lm(car::logit(DiversityC) ~ NRG , data = dat)
Cand.models[[4]] <- lm(car::logit(DiversityC) ~ AreaNRG, data = dat)
Cand.models[[5]] <- lm(car::logit(DiversityC) ~ TimeArea, data = dat)
Cand.models[[6]] <- lm(car::logit(DiversityC) ~ TimeNRG, data = dat)
Cand.models[[7]] <- lm(car::logit(DiversityC) ~ TimeAreaNRG, data = dat)
Cand.models[[8]] <- lm(car::logit(DiversityC) ~ cvArea, data = dat)
Cand.models[[9]] <- lm(car::logit(DiversityC) ~ cvNRG , data = dat)
Cand.models[[10]] <- lm(car::logit(DiversityC) ~ cvArea + cvNRG, data = dat)
Cand.models[[11]] <- lm(car::logit(DiversityC) ~ TimeNRG + cvArea , data = dat)
Cand.models[[12]] <- lm(car::logit(DiversityC) ~ TimeAreaNRG + cvArea, data = dat)
Cand.models[[13]] <- lm(car::logit(DiversityC) ~ 1, data = dat)
##create a vector of names to trace back models in set
Modnames <- paste("mod", 1:length(Cand.models), sep = " ")
##generate AICc table
tab1 = aictab(cand.set = Cand.models, modnames = Modnames, sort = FALSE)
##generate pseudo R-squared values
rsq = c()
for (i in 1:length(Cand.models)){
rsq[i] = summary(Cand.models[[i]])$r.sq
}
data.frame(tab1$Modnames, tab1$K, tab1$Delta_AICc, rsq)
######################################
# Diversity Gaussian GAM regressions #
######################################
Cand.models <- list( )
Cand.models[[1]] <- gam(car::logit(DiversityC) ~ s(Time, k = 3), data = dat)
Cand.models[[2]] <- gam(car::logit(DiversityC) ~ s(Area, k = 3), data = dat)
Cand.models[[3]] <- gam(car::logit(DiversityC) ~ s(NRG , k = 3), data = dat)
Cand.models[[4]] <- gam(car::logit(DiversityC) ~ s(AreaNRG, k = 5), data = dat)
Cand.models[[5]] <- gam(car::logit(DiversityC) ~ s(TimeArea, k = 5), data = dat)
Cand.models[[6]] <- gam(car::logit(DiversityC) ~ s(TimeNRG, k = 5), data = dat)
Cand.models[[7]] <- gam(car::logit(DiversityC) ~ s(TimeAreaNRG, k = 5), data = dat)
Cand.models[[8]] <- gam(car::logit(DiversityC) ~ s(cvArea, k = 5), data = dat)
Cand.models[[9]] <- gam(car::logit(DiversityC) ~ s(cvNRG , k = 5), data = dat)
Cand.models[[10]] <- gam(car::logit(DiversityC) ~ s(cvArea, k =5) + s(cvNRG, k = 5), data = dat)
Cand.models[[11]] <- gam(car::logit(DiversityC) ~ s(TimeNRG, k = 5) + s(cvArea, k = 5) , data = dat)
Cand.models[[12]] <- gam(car::logit(DiversityC) ~ s(TimeAreaNRG, k = 5) + s(cvArea, k = 5), data = dat)
Cand.models[[13]] <- gam(car::logit(DiversityC) ~ s(TimeNRG, k = 5) + s(Time, k = 3) + s(NRG, k = 3), data = dat)
Cand.models[[14]] <- gam(car::logit(DiversityC) ~ s(Time, k = 3) + s(NRG, k = 3), data = dat)
Cand.models[[15]] <- gam(car::logit(DiversityC) ~ 1, data = dat)
##create a vector of names to trace back models in set
modnames <- paste("mod", 1:length(Cand.models), sep = " ")
##generate AICc & pseudo Rsquared table
rsq = c()
AICC = c()
for (i in 1:length(Cand.models)){
rsq[i] = summary(Cand.models[[i]])$r.sq
AICC[i] = AICc(Cand.models[[i]])
}
data.frame(modnames, rsq, AICC - min(AICC))
######################################################################
################# GENERATE PREDICTED VALUES ######################
######################################################################
mod_gam1 <- gam(car::logit(DiversityC) ~ s(TimeNRG, k = 5) + cvArea, data = dat)
mod_gam2 <- gam(Richness ~ s(TimeNRG, k = 5) + cvArea, family = "poisson", data = dat)
mod_gam3 <- gam(car::logit(DiversityC) ~ s(NRG, k = 3) + cvArea, data = dat)
mod_gam4 <- gam(Richness ~ s(NRG, k = 3) + cvArea, family = "poisson", data = dat)
values1 <- dat$TimeNRG
values2 <- dat$cvArea
Counts.exponential1 <- (predict(mod_gam1,list(TimeNRG = values1, cvArea = values2)))
Counts.exponential2 <- exp(predict(mod_gam2,list(TimeNRG = values1, cvArea = values2)))
Counts.exponential3 <- (predict(mod_gam3,list(NRG = values3, cvArea = values2)))
Counts.exponential4 <- exp(predict(mod_gam4,list(NRG = values3, cvArea = values2)))
summary(lm(Counts.exponential1 ~ car::logit(dat$DiversityC)))$adj.r.squared
summary(lm(Counts.exponential2 ~ dat$Richness))$adj.r.squared
summary(lm(Counts.exponential3 ~ car::logit(dat$DiversityC)))$adj.r.squared
summary(lm(Counts.exponential4 ~ dat$Richness))$adj.r.squared
##################################################################
##### NESTED MODELS FOR TIME + PRODUCTIVITY + TIMEPRODUCTIVITY ###
##################################################################
mod1 = lm(car::logit(DiversityC) ~ Time + finalNRG + TimeNRG, data = dat)
mod2 = glm(Richness ~ Time + finalNRG + TimeNRG, data = dat, family = "poisson")
summary(mod1)
summary(mod2)
#and the pseudo R-squared for the GLM fit
1- (mod2$deviance / mod2$null.deviance)
########################################################
##### STATISTICS FOR EQUILIBRIUM FOLLOW UP EXPERIMENT ##
########################################################
dropURLnew <- "https://dl.dropboxusercontent.com/u/26280366/Papers/EcoLetts/PseudoExtinctionDebtNew.csv"
newdat <- repmis::source_data(dropURLnew,sep = ",",header = TRUE)
# Diversity regressions
ylow = subset(car::logit(newdat$Diversity), newdat$Color == "green")
xlow = subset(newdat$Age, newdat$Color == "green")
ymed = subset(car::logit(newdat$Diversity), newdat$Color == "blue")
xmed = subset(newdat$Age, newdat$Color == "blue")
yhi = subset(car::logit(newdat$Diversity), newdat$Color == "red")
xhi = subset(newdat$Age, newdat$Color == "red")
ypost = subset(car::logit(newdat$Diversity), newdat$Color == "black")
xpost = subset(newdat$Age, newdat$Color == "black")
lowmod1 = gam(ylow~s(xlow, k = 3))
lowmodNull = gam(ylow~1)
medmod1 = gam(ymed~s(xmed, k = 3))
medmodNull = gam(ymed~1)
himod1 = gam(yhi~s(xhi, k = 3))
himodNull = gam(yhi~1)
postmod1 = gam(ypost~xpost)
postmodNull = gam(ypost~1)
# Diversity Chi-Square tests
anova(lowmod1, lowmodNull, test = "Chisq")
anova(medmod1, medmodNull, test = "Chisq")
anova(himod1, himodNull, test = "Chisq")
anova(postmod1, postmodNull, test = "Chisq")
# Richness regressions
ylow = subset(newdat$Richness, newdat$Color == "green")
xlow = subset(newdat$Age, newdat$Color == "green")
ymed = subset(newdat$Richness, newdat$Color == "blue")
xmed = subset(newdat$Age, newdat$Color == "blue")
yhi = subset(newdat$Richness, newdat$Color == "red")
xhi = subset(newdat$Age, newdat$Color == "red")
ypost = subset(newdat$Richness, newdat$Color == "black")
xpost = subset(newdat$Age, newdat$Color == "black")
lowmod1 = gam(ylow~s(xlow, k = 3), family = "poisson")
lowmodNull = gam(ylow~1, family = "poisson")
medmod1 = gam(ymed~s(xmed, k = 3), family = "poisson")
medmodNull = gam(ymed~1, family = "poisson")
himod1 = gam(yhi~s(xhi, k = 3), family = "poisson")
himodNull = gam(yhi~1, family = "poisson")
postmod1 = gam(ypost~s(xpost, k = 3), family = "poisson")
postmodNull = gam(ypost~1, family = "poisson")
# Richness Chi-square tests
anova(lowmod1, lowmodNull, test = "Chisq")
anova(medmod1, medmodNull, test = "Chisq")
anova(himod1, himodNull, test = "Chisq")
anova(postmod1, postmodNull, test = "Chisq")
##################################
## ZERO-INFLATION ISSUES ##
##################################
hist(dat$Richness-1)
hist(dat$DiversityC)
#For Richness I compare the AIC values of four different count models:
#The negative binomial, poisson (used above), Hurdle, and zero-inflated Poisson
fm_negbin <- glm.nb(Richness-1 ~ TimeNRG + cvArea, data = dat, trace = T)
fm_pois <- glm(Richness-1 ~ TimeNRG + cvArea, data = dat, family = "poisson")
fm_hurdle <- hurdle(Richness-1 ~ TimeNRG + cvArea, data = dat, dist = "poisson")
fm_zip <- zeroinfl(Richness-1 ~ TimeNRG + cvArea, data = dat, dist = "poisson")
AIC(fm_negbin, fm_pois, fm_hurdle, fm_zip)
# Although AIC favors the Hurdle model, investigation of the residuals makes me inclined to
# keep the Poisson. Neither residuals appear to fit very well, and there is no real theoretical
# reason to assume zeroes and non-zero values (in terms of new morphotypes emerging) result
# from different processes.
par(mfrow = c(1,2))
qqnorm(residuals(fm_pois), main = "Poisson")
qqline(residuals(fm_pois))
qqnorm(resid(fm_hurdle), main = "Hurdle-Poisson")
qqline(resid(fm_hurdle))
dev.off()
values1 <- dat$TimeNRG
values2 <- dat$cvArea
Counts.exponentialHurdle <- exp(predict(fm_hurdle,list(TimeNRG = values1, cvArea = values2)))
Counts.exponentialPois <- exp(predict(fm_pois,list(TimeNRG = values1, cvArea = values2)))
# Both the Hurdle and Poisson models predict new values with similar accuracy
summary(lm(Counts.exponentialHurdle +1 ~ dat$Richness))
summary(lm(Counts.exponentialPois +1 ~ dat$Richness))
# But investigation of predicted values shows that the
# Hurdle tends to vastly overestimate diversity (15 max!) while the Poisson appears accurate
par(mfrow = c(1,2))
plot(Counts.exponentialHurdle + 1 , dat$Richness, main = "Hurdle", xlab = "Predicted", ylab = "Observed", ylim = c(1,4), xlim = c(1,17))
abline(0,1)
plot(Counts.exponentialPois + 1 , dat$Richness, main = "Poisson", xlab = "Predicted", ylab = "Observed", ylim = c(1,4), xlim = c(1,5))
abline(0,1)
###############################
### LOGIT TRANSFORM ISSUES ####
###############################
#Comparing the results from different transformations of the Diversity response.
# Transformations are raw, arcsin-square root, and logit
mod_raw <- gam(DiversityC ~ s(TimeNRG, k= 5) + cvArea , data = dat)
mod_asin <- gam(asin(sqrt(DiversityC)) ~ s(TimeNRG, k = 5) + cvArea , data = dat)
mod_logit <- gam(car::logit(DiversityC) ~ s(TimeNRG, k = 5) + cvArea , data = dat)
#All three transforms appear to be qualitatively similar.
summary(mod_raw)
summary(mod_asin)
summary(mod_logit)
#By plotting the standardized residuals against the theoretical normal quantiles,
#we can see that the logit transform
#fulfills the assumption of normally-distributed error while the other two do not.
par(mfrow = c(3,3))
qqnorm(residuals(mod_raw, type = "scaled.pearson"), ylab="Standardized Residuals", xlab="Normal Scores", main="Raw")
qqline(residuals(mod_raw, type = "scaled.pearson"))
qqnorm(residuals(mod_asin, type = "scaled.pearson"), ylab="Standardized Residuals", xlab="Normal Scores", main="ArcsinSquareroot")
qqline(residuals(mod_asin, type = "scaled.pearson"))
qqnorm(residuals(mod_logit, type = "scaled.pearson"), ylab="Standardized Residuals", xlab="Normal Scores", main="Logit")
qqline(residuals(mod_logit, type = "scaled.pearson"))
# Plotting the predicted values against the residuals shows that the logit transform minimizes error heteroskedasticity
# and none of the plots are nonlinear.
plot(residuals(mod_raw)~predict(mod_raw, type = "response"))
lines(smooth.spline(residuals(mod_raw)~predict(mod_raw, type = "response"), spar = 1), col = "blue")
plot(residuals(mod_asin)~predict(mod_asin, type = "response"))
lines(smooth.spline(residuals(mod_asin)~predict(mod_asin, type = "response"), spar = 1), col = "blue")
plot(residuals(mod_logit)~predict(mod_logit, type = "response"))
lines(smooth.spline(residuals(mod_logit)~predict(mod_logit, type = "response"), spar = 1), col = "blue")
#We can also plot the predicted vs observed values to see that the data take a similar shape, regardless of the transformation used
Counts.exponentialRaw <- predict(mod_raw,list(TimeNRG = values1, cvArea = values2))
Counts.exponentialAsin <- predict(mod_asin,list(TimeNRG = values1, cvArea = values2))
Counts.exponentialLogit <- predict(mod_logit,list(TimeNRG = values1, cvArea = values2))
plot(Counts.exponentialRaw, dat$DiversityC, main = "", xlab = "Predicted", ylab = "Observed")
summary(lm(Counts.exponentialRaw~ dat$DiversityC))
abline(0,1)
plot(Counts.exponentialAsin, asin(sqrt(dat$DiversityC)), main = "", xlab = "Predicted", ylab = "Observed")
summary(lm(Counts.exponentialRaw~ asin(sqrt(dat$DiversityC))))
abline(0,1)
plot(Counts.exponentialLogit, car::logit(dat$DiversityC), main = "", xlab = "Predicted", ylab = "Observed")
summary(lm(Counts.exponentialRaw~ car::logit(dat$DiversityC)))
abline(0,1)
####### End ######
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment