Instantly share code, notes, and snippets.

@joshkyh /analysis.r
Last active May 16, 2018

Embed
What would you like to do?
Different fit given the same r-square
# getBicop is a snippet from https://stats.stackexchange.com/questions/15011/generate-a-random-variable-with-a-defined-correlation-to-an-existing-variables/15035#15035
# returns a data frame of two variables which correlate with a population correlation of rho
# If desired, one of both variables can be fixed to an existing variable by specifying x
getBiCop <- function(n, rho, mar.fun=rnorm, x = NULL, ...) {
if (!is.null(x)) {X1 <- x} else {X1 <- mar.fun(n, ...)}
if (!is.null(x) & length(x) != n) warning("Variable x does not have the same length as n!")
C <- matrix(rho, nrow = 2, ncol = 2)
diag(C) <- 1
C <- chol(C)
X2 <- mar.fun(n)
X <- cbind(X1,X2)
# induce correlation (does not change X1)
df <- X %*% C
## if desired: check results
#all.equal(X1,X[,1])
#cor(X)
return(df)
}
set.seed(3)
data = data.frame(revenue = runif(min = 1000, max = 2000, n=52*3))
data$search.spend = getBiCop(n=52*3, rho = 0.0052, x = data$revenue)[,2]*10
cor(data)
data$log_revenue = log(data$revenue+1)
data$log_search.spend = log(data$search.spend+1)
data$search.spend_sq = data$search.spend^2
linear.fit <- lm(revenue ~ search.spend, data = data)
log.fit <- lm(log_revenue ~ search.spend, data = data)
quad.fit <- lm(revenue ~ search.spend + search.spend_sq, data = data)
x_max = 600
y_max = 4000
search.spend.grid = data.frame(search.spend =seq(from = 0, to= x_max, by=x_max/100))
search.spend.grid$search.spend_sq = search.spend.grid$search.spend ^2
plotdf = data.frame(search.spend.grid)
plotdf$pred_linear <- predict(linear.fit, search.spend.grid)
plotdf$pred_log <- exp(predict(log.fit, search.spend.grid))
plotdf$pred_quad <- predict(quad.fit, search.spend.grid)
plot(data$search.spend, data$revenue
, xlim = c(0,x_max), ylim = c(0,y_max)
, xlab="Search Spend"
, ylab="Revenue")
lines (plotdf$search.spend, plotdf$pred_linear, type = 'l',col='red', lwd =2)
lines (plotdf$search.spend, plotdf$pred_log, type = 'l',col='green', lwd =2)
lines (plotdf$search.spend, plotdf$pred_quad, type = 'l',col='purple', lwd =2)
legend("topright"
, legend=c(paste("linear model, r_sq =", round(summary(log.fit)$r.squared,2))
, paste("log revenue model, r_sq =", round(summary(linear.fit)$r.squared,2))
, paste("quadratic model, r_sq =", round(summary(quad.fit)$r.squared,2)))
,col=c("red", "green", "purple")
, lwd=2
, lty=1
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment