Florian Hartig florianhartig

## typeI-AIC.R
# This example shows how AIC selection, followed by a conventional regression analysis of the selected model, massively inflates false positives. CC BY-NC-SA 4.0 Florian Hartig

set.seed(1)
library(MASS)

dat = data.frame(matrix(runif(20000), ncol = 100))
dat$y = rnorm(200)
fullModel = lm(y ~ . , data = dat)
summary(fullModel)
# 2 predictors out of 100 significant (on average, we expect 5 of 100 to be significant)

## rf-VIPsWithCollinearity.R
# The purpose of this script is to show that RF variable importance
# will split importance values for collinear variables evenly,
# even if collinearity is low enough so that variables are sepearable
# and would be correctly separameted by an lm / ANOVA

set.seed(123)

# simulation parameters
n = 3000
col = 0.7

## Collider bias example.R
n = 1000 # sample size

# generating data according to a collider structure
x1 = runif(n) # random values for explenatory variable x1
y = 1.4 * x1 + rnorm(n,sd = 0.1) # y is causally influences by x1, effect size 0.4
x2 = 2 * x1 + 2 * y + rnorm(n,sd = 0.1) # x2 is a collider, no influence on y, but influenced by y and x1

# ignoring the collider results in the correct regression estimate of approx 1.4
summary(lm(y ~ x1))

## metropolis.R
trueA = 5
trueB = 0
trueSd = 10
sampleSize = 31

# create independent x-values
x =(-(sampleSize-1)/2):((sampleSize-1)/2)
# create dependent values according to ax + b + N(0,sd)
y = trueA * x + trueB + rnorm(n=sampleSize,mean=0,sd=trueSd)

## Hurricanes and Himmicanes revisited with DHARMa
library(gdata)
Data = read.xls("http://www.pnas.org/content/suppl/2014/05/30/1402786111.DCSupplemental/pnas.1402786111.sd01.xlsx",
                nrows = 92, as.is = TRUE)

library(glmmTMB)
originalModelGAM = glmmTMB(alldeaths ~ scale(MasFem) *
                          (scale(Minpressure_Updated.2014) + scale(NDAM)),
                           data = Data, family = nbinom2)

# Residual checks with DHARMa

## CI and PI for regression models
n = 50
x = seq(-1,1, len = n)
f = function(x) 0.3 * x^2
y = f(x) + rnorm(n, sd = 0.2)


par(mfrow = c(1,2))


lmFit <- lm(y ~ x + I(x^2))
	# This example shows how AIC selection, followed by a conventional regression analysis of the selected model, massively inflates false positives. CC BY-NC-SA 4.0 Florian Hartig

	set.seed(1)
	library(MASS)

	dat = data.frame(matrix(runif(20000), ncol = 100))
	dat$y = rnorm(200)
	fullModel = lm(y ~ . , data = dat)
	summary(fullModel)
	# 2 predictors out of 100 significant (on average, we expect 5 of 100 to be significant)
	# The purpose of this script is to show that RF variable importance
	# will split importance values for collinear variables evenly,
	# even if collinearity is low enough so that variables are sepearable
	# and would be correctly separameted by an lm / ANOVA

	set.seed(123)

	# simulation parameters
	n = 3000
	col = 0.7
	n = 1000 # sample size

	# generating data according to a collider structure
	x1 = runif(n) # random values for explenatory variable x1
	y = 1.4 * x1 + rnorm(n,sd = 0.1) # y is causally influences by x1, effect size 0.4
	x2 = 2 * x1 + 2 * y + rnorm(n,sd = 0.1) # x2 is a collider, no influence on y, but influenced by y and x1

	# ignoring the collider results in the correct regression estimate of approx 1.4
	summary(lm(y ~ x1))
	trueA = 5
	trueB = 0
	trueSd = 10
	sampleSize = 31

	# create independent x-values
	x =(-(sampleSize-1)/2):((sampleSize-1)/2)
	# create dependent values according to ax + b + N(0,sd)
	y = trueA * x + trueB + rnorm(n=sampleSize,mean=0,sd=trueSd)
	library(gdata)
	Data = read.xls("http://www.pnas.org/content/suppl/2014/05/30/1402786111.DCSupplemental/pnas.1402786111.sd01.xlsx",
	nrows = 92, as.is = TRUE)

	library(glmmTMB)
	originalModelGAM = glmmTMB(alldeaths ~ scale(MasFem) *
	(scale(Minpressure_Updated.2014) + scale(NDAM)),
	data = Data, family = nbinom2)

	# Residual checks with DHARMa
	n = 50
	x = seq(-1,1, len = n)
	f = function(x) 0.3 * x^2
	y = f(x) + rnorm(n, sd = 0.2)


	par(mfrow = c(1,2))


	lmFit <- lm(y ~ x + I(x^2))