Last active
May 16, 2018 09:42
-
-
Save joshkyh/9b6f8b67cc537d5cc857719af6b4629e to your computer and use it in GitHub Desktop.
Different fit given the same r-square
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# getBicop is a snippet from https://stats.stackexchange.com/questions/15011/generate-a-random-variable-with-a-defined-correlation-to-an-existing-variables/15035#15035 | |
# returns a data frame of two variables which correlate with a population correlation of rho | |
# If desired, one of both variables can be fixed to an existing variable by specifying x | |
getBiCop <- function(n, rho, mar.fun=rnorm, x = NULL, ...) { | |
if (!is.null(x)) {X1 <- x} else {X1 <- mar.fun(n, ...)} | |
if (!is.null(x) & length(x) != n) warning("Variable x does not have the same length as n!") | |
C <- matrix(rho, nrow = 2, ncol = 2) | |
diag(C) <- 1 | |
C <- chol(C) | |
X2 <- mar.fun(n) | |
X <- cbind(X1,X2) | |
# induce correlation (does not change X1) | |
df <- X %*% C | |
## if desired: check results | |
#all.equal(X1,X[,1]) | |
#cor(X) | |
return(df) | |
} | |
set.seed(3) | |
data = data.frame(revenue = runif(min = 1000, max = 2000, n=52*3)) | |
data$search.spend = getBiCop(n=52*3, rho = 0.0052, x = data$revenue)[,2]*10 | |
cor(data) | |
data$log_revenue = log(data$revenue+1) | |
data$log_search.spend = log(data$search.spend+1) | |
data$search.spend_sq = data$search.spend^2 | |
linear.fit <- lm(revenue ~ search.spend, data = data) | |
log.fit <- lm(log_revenue ~ search.spend, data = data) | |
quad.fit <- lm(revenue ~ search.spend + search.spend_sq, data = data) | |
x_max = 600 | |
y_max = 4000 | |
search.spend.grid = data.frame(search.spend =seq(from = 0, to= x_max, by=x_max/100)) | |
search.spend.grid$search.spend_sq = search.spend.grid$search.spend ^2 | |
plotdf = data.frame(search.spend.grid) | |
plotdf$pred_linear <- predict(linear.fit, search.spend.grid) | |
plotdf$pred_log <- exp(predict(log.fit, search.spend.grid)) | |
plotdf$pred_quad <- predict(quad.fit, search.spend.grid) | |
plot(data$search.spend, data$revenue | |
, xlim = c(0,x_max), ylim = c(0,y_max) | |
, xlab="Search Spend" | |
, ylab="Revenue") | |
lines (plotdf$search.spend, plotdf$pred_linear, type = 'l',col='red', lwd =2) | |
lines (plotdf$search.spend, plotdf$pred_log, type = 'l',col='green', lwd =2) | |
lines (plotdf$search.spend, plotdf$pred_quad, type = 'l',col='purple', lwd =2) | |
legend("topright" | |
, legend=c(paste("linear model, r_sq =", round(summary(log.fit)$r.squared,2)) | |
, paste("log revenue model, r_sq =", round(summary(linear.fit)$r.squared,2)) | |
, paste("quadratic model, r_sq =", round(summary(quad.fit)$r.squared,2))) | |
,col=c("red", "green", "purple") | |
, lwd=2 | |
, lty=1 | |
) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment