goldingn/pseudo_r2_is_bad.R

## pseudo_r2_is_bad.R
# demonstrating how bad an esitmate of model goodness fo fit pseudo R2 is with small integer data

# fake poisson glm
set.seed(1)
n <- 1000
x <- rnorm(n)

# the lower the rates, the worse the pseudo-r squared says the model is
intercept <- -2
# try twiddling the intercept to change the average rate for the Poisson
# with 10: pr2 = 0.998
# with 0: pr2 = 0.119
# with -2: pr2 = 0.0618

# the "true" rates, i.e. from the perfect model
# the true model should have a high pseudo R2, right?
lambda <- exp(intercept + 0.5 * x)

# simulate observed data form the true model
y <- rpois(n, lambda)

# function to calculate the deviance
dev <- function (pred)
  -2 * sum(dpois(y, pred, log = TRUE))

dev_null <- dev(mean(y))
dev_true <- dev(lambda)
(pseudo_r2 <- 1 - dev_true / dev_null)

# :O
	# demonstrating how bad an esitmate of model goodness fo fit pseudo R2 is with small integer data

	# fake poisson glm
	set.seed(1)
	n <- 1000
	x <- rnorm(n)

	# the lower the rates, the worse the pseudo-r squared says the model is
	intercept <- -2
	# try twiddling the intercept to change the average rate for the Poisson
	# with 10: pr2 = 0.998
	# with 0: pr2 = 0.119
	# with -2: pr2 = 0.0618

	# the "true" rates, i.e. from the perfect model
	# the true model should have a high pseudo R2, right?
	lambda <- exp(intercept + 0.5 * x)

	# simulate observed data form the true model
	y <- rpois(n, lambda)

	# function to calculate the deviance
	dev <- function (pred)
	-2 * sum(dpois(y, pred, log = TRUE))

	dev_null <- dev(mean(y))
	dev_true <- dev(lambda)
	(pseudo_r2 <- 1 - dev_true / dev_null)

	# :O