Florian Hartig florianhartig

## BinomialAnalysisWith-MLE-NHST-Bayes.r
rm(list=ls(all=TRUE))

#Analysis of likelihood, p-value, and Bayes for binomial model,
#10 trials, 3 success, unknown coin, want to do inference

trials = 10
success = 3

# GETTING THE MLE ESTIMATE

## Bayes-Example-HierarchicalModel.r
rm(list=ls(all=TRUE))

library(rjags)

# assuming the data is created from an ecological system that creates an
# exponential size distribution (e.g. you sample individuals from a population that can be
# expected to follow this distribution), but this measurments are done with
# a considerable lognormal observation error
# for a realistic application see http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0058036

## Teaching-Stats-Likelihood-Intro-GER
# Praktische Vorlesung, Einführung Statistik, WS 13/14
# Florian Hartig, http://www.biom.uni-freiburg.de/mitarbeiter/hartig

?read.csv

# Wiederholung likelihood

# Als estes erstellen wir einen leeren Plot

plot(NULL, NULL, xlim=c(-4,6), ylim = c(0,0.5), ylab = "Wahrscheinlichkeit(sdichte)", xlab = "Observed value")

## CorrelationDensityPlotWithIpanel.r
library(IDPmisc)

panel.hist <- function(x, ...)
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(usr[1:2], 0, 1.5) )
  h <- hist(x, plot = FALSE)
  breaks <- h$breaks; nB <- length(breaks)
  y <- h$counts; y <- y/max(y)
  rect(breaks[-nB], 0, breaks[-1], y, col="blue4", ...)

## samplingDesign3SpeciesWithNoEqualNeigbors.r
# plot layout
x=12
y=12
# number of diff samples
n=3

# plot A, which holds
A=array(NA,c(x,y))
# repeat 1 through n as many times as subplots are in A
s=rep(1:n, x*y/n)

## cl-jags.R
## Load libraries
library(mclogit)
library(reshape)
library(rjags)
library(R2WinBUGS) ## for write.model

## Load the data file from mclogit
data(Transport)

## Do the mclogit model

## Checking Bayesian Residuals with DHARMa
############ CREATE ZERO-INFLATED GLMM DATA #################

# This first part creates a dataset with beetles counts across an altitudinal gradient (several plots each observed several years), with a random intercept on year and zero-inflation.

altitude = rep(seq(0,1,len = 50), each = 20)
dataID = 1:1000
spatialCoordinate = rep(seq(0,30, len = 50), each = 20)

# random effects + zeroinflation
plot = rep(1:50, each = 20)

## source_https
# from https://tonybreyal.wordpress.com/2011/11/24/source_https-sourcing-an-r-script-from-github/
# and http://stackoverflow.com/questions/7715723/sourcing-r-script-over-https

source_https <- function(url, ...) {
  # load package
  require(RCurl)

  # parse and evaluate each .R script
  sapply(c(url, ...), function(u) {
    eval(parse(text = getURL(u, followlocation = TRUE, cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))), envir = .GlobalEnv)

## typeI-AIC.R
# This example shows how AIC selection, followed by a conventional regression analysis of the selected model, massively inflates false positives. CC BY-NC-SA 4.0 Florian Hartig

set.seed(1)
library(MASS)

dat = data.frame(matrix(runif(20000), ncol = 100))
dat$y = rnorm(200)
fullModel = lm(y ~ . , data = dat)
summary(fullModel)
# 2 predictors out of 100 significant (on average, we expect 5 of 100 to be significant)

## Collider bias example.R
n = 1000 # sample size

# generating data according to a collider structure
x1 = runif(n) # random values for explenatory variable x1
y = 1.4 * x1 + rnorm(n,sd = 0.1) # y is causally influences by x1, effect size 0.4
x2 = 2 * x1 + 2 * y + rnorm(n,sd = 0.1) # x2 is a collider, no influence on y, but influenced by y and x1

# ignoring the collider results in the correct regression estimate of approx 1.4
summary(lm(y ~ x1))
	rm(list=ls(all=TRUE))

	#Analysis of likelihood, p-value, and Bayes for binomial model,
	#10 trials, 3 success, unknown coin, want to do inference

	trials = 10
	success = 3

	# GETTING THE MLE ESTIMATE
	rm(list=ls(all=TRUE))

	library(rjags)

	# assuming the data is created from an ecological system that creates an
	# exponential size distribution (e.g. you sample individuals from a population that can be
	# expected to follow this distribution), but this measurments are done with
	# a considerable lognormal observation error
	# for a realistic application see http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0058036
	# Praktische Vorlesung, Einführung Statistik, WS 13/14
	# Florian Hartig, http://www.biom.uni-freiburg.de/mitarbeiter/hartig

	?read.csv

	# Wiederholung likelihood

	# Als estes erstellen wir einen leeren Plot

	plot(NULL, NULL, xlim=c(-4,6), ylim = c(0,0.5), ylab = "Wahrscheinlichkeit(sdichte)", xlab = "Observed value")
	library(IDPmisc)

	panel.hist <- function(x, ...)
	{
	usr <- par("usr"); on.exit(par(usr))
	par(usr = c(usr[1:2], 0, 1.5) )
	h <- hist(x, plot = FALSE)
	breaks <- h$breaks; nB <- length(breaks)
	y <- h$counts; y <- y/max(y)
	rect(breaks[-nB], 0, breaks[-1], y, col="blue4", ...)
	# plot layout
	x=12
	y=12
	# number of diff samples
	n=3

	# plot A, which holds
	A=array(NA,c(x,y))
	# repeat 1 through n as many times as subplots are in A
	s=rep(1:n, x*y/n)
	## Load libraries
	library(mclogit)
	library(reshape)
	library(rjags)
	library(R2WinBUGS) ## for write.model

	## Load the data file from mclogit
	data(Transport)

	## Do the mclogit model
	############ CREATE ZERO-INFLATED GLMM DATA #################

	# This first part creates a dataset with beetles counts across an altitudinal gradient (several plots each observed several years), with a random intercept on year and zero-inflation.

	altitude = rep(seq(0,1,len = 50), each = 20)
	dataID = 1:1000
	spatialCoordinate = rep(seq(0,30, len = 50), each = 20)

	# random effects + zeroinflation
	plot = rep(1:50, each = 20)
	# from https://tonybreyal.wordpress.com/2011/11/24/source_https-sourcing-an-r-script-from-github/
	# and http://stackoverflow.com/questions/7715723/sourcing-r-script-over-https

	source_https <- function(url, ...) {
	# load package
	require(RCurl)

	# parse and evaluate each .R script
	sapply(c(url, ...), function(u) {
	eval(parse(text = getURL(u, followlocation = TRUE, cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))), envir = .GlobalEnv)
	# This example shows how AIC selection, followed by a conventional regression analysis of the selected model, massively inflates false positives. CC BY-NC-SA 4.0 Florian Hartig

	set.seed(1)
	library(MASS)

	dat = data.frame(matrix(runif(20000), ncol = 100))
	dat$y = rnorm(200)
	fullModel = lm(y ~ . , data = dat)
	summary(fullModel)
	# 2 predictors out of 100 significant (on average, we expect 5 of 100 to be significant)
	n = 1000 # sample size

	# generating data according to a collider structure
	x1 = runif(n) # random values for explenatory variable x1
	y = 1.4 * x1 + rnorm(n,sd = 0.1) # y is causally influences by x1, effect size 0.4
	x2 = 2 * x1 + 2 * y + rnorm(n,sd = 0.1) # x2 is a collider, no influence on y, but influenced by y and x1

	# ignoring the collider results in the correct regression estimate of approx 1.4
	summary(lm(y ~ x1))