Created
May 6, 2015 13:49
-
-
Save logworthy/5eba79ebd7669a16861e to your computer and use it in GitHub Desktop.
Simulation of how simple logistic regresion can fail to model binomial response when the underlying function is more complicated.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Simulation of how simple logistic regression can fail to model | |
# binomial response when the underlying function is more complicated | |
install.packages(c('data.table', 'plyr', 'ggplot2', 'drc')) | |
library(data.table) | |
library(plyr) | |
library(ggplot2) | |
library(drc) | |
# Generalised logistic function | |
logistic_function <- function(x) { | |
0.25+0.5/(1+exp(-(1*(x-12)+0.1))) | |
} | |
# Range to explore function | |
numeric_variable <- 1:20 | |
# Conduct sampling | |
sample_frame <- adply( | |
.data=numeric_variable | |
, .margins=1 | |
, .id=NULL | |
, .fun=function(x) { | |
p <- logistic_function(x) | |
data.frame( | |
outcome=sample(c(0,1),100,replace=TRUE,prob=(c(1-p, p))) | |
, numeric_variable=x | |
) | |
} | |
) | |
# Calculate conversion at each level of numeric variable | |
sample_summary <- data.table(sample_frame)[ | |
,list( | |
outcome=sum(outcome)/.N | |
, N=.N | |
) | |
,keyby=numeric_variable | |
] | |
# Actual generating function | |
sample_summary[,'actual_function'] <- logistic_function(numeric_variable) | |
# 2-parameter logistic regression | |
sample_summary[,'logistic_prediction_2p'] <- predict( | |
glm(outcome ~ numeric_variable, data=sample_frame, family="binomial") | |
, data.frame(numeric_variable=numeric_variable) | |
, type="response" | |
) | |
# 4-parameter logistic regression | |
sample_summary[,'logistic_prediction_4p'] <- predict( | |
drm(outcome ~ numeric_variable, weights=N, fct=LL.4(), data=sample_summary, type="binomial") | |
, data.frame(numeric_variable=numeric_variable) | |
) | |
# Comparison plot | |
ggplot( | |
data=sample_frame | |
, aes(x=numeric_variable, y=outcome) | |
)+ | |
geom_point(position='jitter')+ | |
geom_line(data=sample_summary, aes(color="Observed Conversion"), size=1)+ | |
geom_line(data=sample_summary, aes(y=logistic_prediction_2p, color="2-Parameter Logistic"), size=1)+ | |
geom_line(data=sample_summary, aes(y=logistic_prediction_4p, color="4-Parameter Logistic"), size=1)+ | |
geom_line(data=sample_summary, aes(y=actual_function, color="Generating Function"), size=1)+ | |
scale_y_continuous("Outcome")+ | |
scale_x_continuous("Numeric Variable")+ | |
guides(color=guide_legend("Models")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment