Skip to content

Instantly share code, notes, and snippets.

@logworthy
Created May 6, 2015 13:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save logworthy/5eba79ebd7669a16861e to your computer and use it in GitHub Desktop.
Save logworthy/5eba79ebd7669a16861e to your computer and use it in GitHub Desktop.
Simulation of how simple logistic regresion can fail to model binomial response when the underlying function is more complicated.
# Simulation of how simple logistic regression can fail to model
# binomial response when the underlying function is more complicated
install.packages(c('data.table', 'plyr', 'ggplot2', 'drc'))
library(data.table)
library(plyr)
library(ggplot2)
library(drc)
# Generalised logistic function
logistic_function <- function(x) {
0.25+0.5/(1+exp(-(1*(x-12)+0.1)))
}
# Range to explore function
numeric_variable <- 1:20
# Conduct sampling
sample_frame <- adply(
.data=numeric_variable
, .margins=1
, .id=NULL
, .fun=function(x) {
p <- logistic_function(x)
data.frame(
outcome=sample(c(0,1),100,replace=TRUE,prob=(c(1-p, p)))
, numeric_variable=x
)
}
)
# Calculate conversion at each level of numeric variable
sample_summary <- data.table(sample_frame)[
,list(
outcome=sum(outcome)/.N
, N=.N
)
,keyby=numeric_variable
]
# Actual generating function
sample_summary[,'actual_function'] <- logistic_function(numeric_variable)
# 2-parameter logistic regression
sample_summary[,'logistic_prediction_2p'] <- predict(
glm(outcome ~ numeric_variable, data=sample_frame, family="binomial")
, data.frame(numeric_variable=numeric_variable)
, type="response"
)
# 4-parameter logistic regression
sample_summary[,'logistic_prediction_4p'] <- predict(
drm(outcome ~ numeric_variable, weights=N, fct=LL.4(), data=sample_summary, type="binomial")
, data.frame(numeric_variable=numeric_variable)
)
# Comparison plot
ggplot(
data=sample_frame
, aes(x=numeric_variable, y=outcome)
)+
geom_point(position='jitter')+
geom_line(data=sample_summary, aes(color="Observed Conversion"), size=1)+
geom_line(data=sample_summary, aes(y=logistic_prediction_2p, color="2-Parameter Logistic"), size=1)+
geom_line(data=sample_summary, aes(y=logistic_prediction_4p, color="4-Parameter Logistic"), size=1)+
geom_line(data=sample_summary, aes(y=actual_function, color="Generating Function"), size=1)+
scale_y_continuous("Outcome")+
scale_x_continuous("Numeric Variable")+
guides(color=guide_legend("Models"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment