Skip to content

Instantly share code, notes, and snippets.

@smc77
Created November 4, 2011 02:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save smc77/1338503 to your computer and use it in GitHub Desktop.
Save smc77/1338503 to your computer and use it in GitHub Desktop.
Overfitting
library(PolynomF)
n <- 10
f <- function(x) sin(2 * pi * x)
x <- seq(0, 1, length=n)
y <- f(x) + rnorm(n, sd=0.2)
plot(data.frame(x, y))
curve(f, type="l", col="green", add=TRUE)
# How to fit explicit polynomial terms4; easier with poly() function
fit <- lm(y ~ x + I(x^2) + I(x^3))
fit <- lm(d ~ poly(x, 3, raw=TRUE))
summary(fit)
# We can predict these values using the coefficients directly
x.data <- data.frame(rep(1, n), x, x^2, x^3)
y.pred <- apply(fit[["coefficients"]] * t(x.data), 2, sum)
# Or we can just use the predict function to do the same thing
y.pred <- predict(fit)
points(data.frame(x, y.pred), col="red")
par(mfrow=c(2, 2))
for (i in c(1, 3, 6, 9)) {
plot(data.frame(x, d), xlab=paste("polynomial fit order", i), ylab="f(x)")
curve(f, type="l", col="green", add=TRUE)
fit <- lm(d ~ poly(x, i, raw=TRUE))
p <- polynom(coef(fit))
curve(p, col="red", add=TRUE)
}
fit.values <- matrix(ncol=2)
for (i in 1:9) {
fit.sum <- summary(lm(d ~ poly(x, i, raw=TRUE)))
fit.values <- rbind(fit.values, c(i, fit.sum["r.squared"][[1]]))
}
colnames(fit.values) <- c("Polynomial Order", "R^2")
plot(fit.values, type="l")
# Adding more data:
par(mfrow=c(2, 2))
for (i in c(10, 50, 100, 1000)) {
x <- seq(0, 1, length=i)
d <- f(x) + rnorm(i, sd=0.15)
plot(data.frame(x, d), xlab=paste("Data size:", i), ylab="f(x)")
curve(f, type="l", col="blue", add=TRUE)
fit <- lm(d ~ poly(x, 9, raw=TRUE))
summary(fit)["r.squared"][[1]]
p <- polynom(coef(fit))
curve(p, col="red", add=TRUE)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment