fditraglia/drop-regressor.R

## drop-regressor.R
# In this example, both x1 and x2 predict y and the two predictors are correlated
# with each other, but x2 has a much larger regression coefficient that x1. In a
# regression with both x1 and x2, both predictors are statistically significant.
# However if we drop x2, then x1 becomes insignificant. This is because dropping
# x2 dramatically increases the residual standard error. This effect overwhelms the
# increase in the estimated coefficient of x1 that arises from its correlation with
# the omitted variable x2.

library(MASS)
set.seed(1234)
n <- 100
S <- matrix(c(1, 0.1,
              0.1, 1), 2, 2, byrow = TRUE)
X <- mvrnorm(n, mu = c(0, 0), S)
x1 <- X[,1]
x2 <- X[,2]

epsilon <- rnorm(n)
b0 <- 0
b1 <- 0.2
b2 <- 2
y <- b0 + b1 * x1 + b2 * x2 + epsilon

reg1 <- lm(y ~ x1 + x2)
reg2 <- lm(y ~ x1)
reg3 <- lm(y ~ x2)

summary(reg1)
summary(reg2)
	# In this example, both x1 and x2 predict y and the two predictors are correlated
	# with each other, but x2 has a much larger regression coefficient that x1. In a
	# regression with both x1 and x2, both predictors are statistically significant.
	# However if we drop x2, then x1 becomes insignificant. This is because dropping
	# x2 dramatically increases the residual standard error. This effect overwhelms the
	# increase in the estimated coefficient of x1 that arises from its correlation with
	# the omitted variable x2.

	library(MASS)
	set.seed(1234)
	n <- 100
	S <- matrix(c(1, 0.1,
	0.1, 1), 2, 2, byrow = TRUE)
	X <- mvrnorm(n, mu = c(0, 0), S)
	x1 <- X[,1]
	x2 <- X[,2]

	epsilon <- rnorm(n)
	b0 <- 0
	b1 <- 0.2
	b2 <- 2
	y <- b0 + b1 * x1 + b2 * x2 + epsilon

	reg1 <- lm(y ~ x1 + x2)
	reg2 <- lm(y ~ x1)
	reg3 <- lm(y ~ x2)

	summary(reg1)
	summary(reg2)