BioSciEconomist/MEFF and Odds Ratios.r

## MEFF and Odds Ratios.r
# r code to support post: http://econometricsense.blogspot.com/2016/05/divide-by-4-rule-for-marginal-effects.html

#------------------------------------------------------------------
 # PROGRAM NAME: MEFF and Odds Ratios
 # DATE: 3/3/16
 # CREATED BY: MATT BOGARD
 # PROJECT FILE:
 #------------------------------------------------------------------
 # PURPOSE: GENERATE MARGINAL EFFECTS FOR LOGISTIC REGRESSION AND COMPARE TO:
 # ODDS RATIOS / RESULTS FROM R
 #
 # REFERENCES: https://statcompute.wordpress.com/2012/09/30/marginal-effects-on-binary-outcome/
 #             https://diffuseprior.wordpress.com/2012/04/23/probitlogit-marginal-effects-in-r-2/
 #
 # UCD CENTRE FOR ECONOMIC RESEARCH
 #  WORKING PAPER SERIES
 # 2011
 # Simple Logit and Probit Marginal Effects in R
 # Alan Fernihough, University College Dublin
 # WP11/22
 # October 2011
 # http://www.ucd.ie/t4cms/WP11_22.pdf
 #------------------------------------------------------------------;


#-----------------------------------------------------
#   generate data for continuous explantory variable
#----------------------------------------------------


Input = ("participate age
1 25
1 26
1 27
1 28
1 29
1 30
0 31
1 32
1 33
0 34
1 35
1 36
1 37
1 38
0 39
0 40
0 41
1 42
1 43
0 44
0 45
1 46
1 47
0 48
0 49
0 50
0 51
0 52
1 53
0 54
")

dat1 <-  read.table(textConnection(Input),header=TRUE)

summary(dat1) # summary stats

#### run logistic regression model

mylogit <- glm(participate ~ age, data = dat1, family = "binomial")
summary(mylogit)

exp(cbind(OR = coef(mylogit), confint(mylogit))) # get odds ratios

#-------------------------------------------------
| marginal effects calculations
#-------------------------------------------------


#--------------------------------------------------------------------
# mfx function for maginal effects from a glm model
#
# from: https://diffuseprior.wordpress.com/2012/04/23/probitlogit-marginal-effects-in-r-2/
# based on:
# UCD CENTRE FOR ECONOMIC RESEARCH
# WORKING PAPER SERIES
# 2011
# Simple Logit and Probit Marginal Effects in R
# Alan Fernihough, University College Dublin
# WP11/22
#October 2011
# http://www.ucd.ie/t4cms/WP11_22.pdf
#---------------------------------------------------------------------

mfx <- function(x,sims=1000){
set.seed(1984)
pdf <- ifelse(as.character(x$call)[3]=="binomial(link = \"probit\")",
mean(dnorm(predict(x, type = "link"))),
mean(dlogis(predict(x, type = "link"))))
pdfsd <- ifelse(as.character(x$call)[3]=="binomial(link = \"probit\")",
sd(dnorm(predict(x, type = "link"))),
sd(dlogis(predict(x, type = "link"))))
marginal.effects <- pdf*coef(x)
sim <- matrix(rep(NA,sims*length(coef(x))), nrow=sims)
for(i in 1:length(coef(x))){
sim[,i] <- rnorm(sims,coef(x)[i],diag(vcov(x)^0.5)[i])
}
pdfsim <- rnorm(sims,pdf,pdfsd)
sim.se <- pdfsim*sim
res <- cbind(marginal.effects,sd(sim.se))
colnames(res)[2] <- "standard.error"
ifelse(names(x$coefficients[1])=="(Intercept)",
return(res[2:nrow(res),]),return(res))
}

# marginal effects from logit
mfx(mylogit)


### code it yourself for marginal effects at the mean

summary(dat1)

b0 <-  5.92972   # estimated intercept from logit
b1 <-  -0.14099  # estimated b from logit

xvar <- 39.5   # reference value (i.e. mean)for explanatory variable
d <- .0001     # incremental change in x

xbi <- (xvar + d)*b1 + b0
xbj <- (xvar - d)*b1 + b0
meff <- ((exp(xbi)/(1+exp(xbi)))-(exp(xbj)/(1+exp(xbj))))/(d*2) ;
print(meff)


### a different perhaps easier formulation for me at the mean

XB <- xvar*b1 + b0 # this could be expanded for multiple b's or x's
meffx <- (exp(XB)/((1+exp(XB))^2))*b1
print(meffx)


### averaging the meff for the whole data set

dat1$XB <- dat1$age*b1 + b0

meffx <- (exp(dat1$XB)/((1+exp(dat1$XB))^2))*b1
summary(meffx) # get mean

#### marginal effects from linear model

lpm <- lm(dat1$participate~dat1$age)
summary(lpm)

#---------------------------------------------------
#
#
# multivariable case
#
#
#---------------------------------------------------

dat2 <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv")

head(dat2)

summary(dat2) # summary stats

#### run logistic regression model

mylogit <- glm(admit ~ gre + gpa, data = dat2, family = "binomial")
summary(mylogit)

exp(cbind(OR = coef(mylogit), confint(mylogit))) # get odds ratios

# marginal effects from logit
mfx(mylogit)


### code it yourself for marginal effects at the mean

summary(dat1)

b0 <-  -4.949378    # estimated intercept from logit
b1 <-  0.002691     # estimated b for gre
b2 <-   0.754687    # estimated b for gpa

x1 <- 587    # reference value (i.e. mean)for gre
x2 <- 3.39   # reference value (i.e. mean)for gre
d <- .0001   # incremental change in x

# meff at means for gre
xbi <- (x1 + d)*b1 + b2*x2 + b0
xbj <- (x1 - d)*b1 + b2*x2 + b0
meff <- ((exp(xbi)/(1+exp(xbi)))-(exp(xbj)/(1+exp(xbj))))/(d*2) ;
print(meff)

# meff at means for gpa
xbi <- (x2 + d)*b2 + b1*x1 + b0
xbj <- (x2 - d)*b2 + b1*x1 + b0
meff <- ((exp(xbi)/(1+exp(xbi)))-(exp(xbj)/(1+exp(xbj))))/(d*2) ;
print(meff)


### a different perhaps easier formulation for me at the mean

XB <- x1*b1 +x2*b2 + b0 # this could be expanded for multiple b's or x's

# meff at means for gre
meffx <- (exp(XB)/((1+exp(XB))^2))*b1
print(meffx)

# meff at means for gpa
meffx <- (exp(XB)/((1+exp(XB))^2))*b2
print(meffx)

### averaging the meff for the whole data set

dat2$XB <- dat2$gre*b1 + dat2$gpa*b2 + b0

# sample avg meff for gre
meffx <- (exp(dat1$XB)/((1+exp(dat1$XB))^2))*b1
summary(meffx) # get mean

# sample avg meff for gpa
meffx <- (exp(dat1$XB)/((1+exp(dat1$XB))^2))*b2
summary(meffx) # get mean

#### marginal effects from linear model

lpm <- lm(admit ~ gre + gpa, data = dat2)
summary(lpm)
	# r code to support post: http://econometricsense.blogspot.com/2016/05/divide-by-4-rule-for-marginal-effects.html

	#------------------------------------------------------------------
	# PROGRAM NAME: MEFF and Odds Ratios
	# DATE: 3/3/16
	# CREATED BY: MATT BOGARD
	# PROJECT FILE:
	#------------------------------------------------------------------
	# PURPOSE: GENERATE MARGINAL EFFECTS FOR LOGISTIC REGRESSION AND COMPARE TO:
	# ODDS RATIOS / RESULTS FROM R
	#
	# REFERENCES: https://statcompute.wordpress.com/2012/09/30/marginal-effects-on-binary-outcome/
	# https://diffuseprior.wordpress.com/2012/04/23/probitlogit-marginal-effects-in-r-2/
	#
	# UCD CENTRE FOR ECONOMIC RESEARCH
	# WORKING PAPER SERIES
	# 2011
	# Simple Logit and Probit Marginal Effects in R
	# Alan Fernihough, University College Dublin
	# WP11/22
	# October 2011
	# http://www.ucd.ie/t4cms/WP11_22.pdf
	#------------------------------------------------------------------;


	#-----------------------------------------------------
	# generate data for continuous explantory variable
	#----------------------------------------------------


	Input = ("participate age
	1 25
	1 26
	1 27
	1 28
	1 29
	1 30
	0 31
	1 32
	1 33
	0 34
	1 35
	1 36
	1 37
	1 38
	0 39
	0 40
	0 41
	1 42
	1 43
	0 44
	0 45
	1 46
	1 47
	0 48
	0 49
	0 50
	0 51
	0 52
	1 53
	0 54
	")

	dat1 <- read.table(textConnection(Input),header=TRUE)

	summary(dat1) # summary stats

	#### run logistic regression model

	mylogit <- glm(participate ~ age, data = dat1, family = "binomial")
	summary(mylogit)

	exp(cbind(OR = coef(mylogit), confint(mylogit))) # get odds ratios

	#-------------------------------------------------
	\| marginal effects calculations
	#-------------------------------------------------


	#--------------------------------------------------------------------
	# mfx function for maginal effects from a glm model
	#
	# from: https://diffuseprior.wordpress.com/2012/04/23/probitlogit-marginal-effects-in-r-2/
	# based on:
	# UCD CENTRE FOR ECONOMIC RESEARCH
	# WORKING PAPER SERIES
	# 2011
	# Simple Logit and Probit Marginal Effects in R
	# Alan Fernihough, University College Dublin
	# WP11/22
	#October 2011
	# http://www.ucd.ie/t4cms/WP11_22.pdf
	#---------------------------------------------------------------------

	mfx <- function(x,sims=1000){
	set.seed(1984)
	pdf <- ifelse(as.character(x$call)[3]=="binomial(link = \"probit\")",
	mean(dnorm(predict(x, type = "link"))),
	mean(dlogis(predict(x, type = "link"))))
	pdfsd <- ifelse(as.character(x$call)[3]=="binomial(link = \"probit\")",
	sd(dnorm(predict(x, type = "link"))),
	sd(dlogis(predict(x, type = "link"))))
	marginal.effects <- pdf*coef(x)
	sim <- matrix(rep(NA,sims*length(coef(x))), nrow=sims)
	for(i in 1:length(coef(x))){
	sim[,i] <- rnorm(sims,coef(x)[i],diag(vcov(x)^0.5)[i])
	}
	pdfsim <- rnorm(sims,pdf,pdfsd)
	sim.se <- pdfsim*sim
	res <- cbind(marginal.effects,sd(sim.se))
	colnames(res)[2] <- "standard.error"
	ifelse(names(x$coefficients[1])=="(Intercept)",
	return(res[2:nrow(res),]),return(res))
	}

	# marginal effects from logit
	mfx(mylogit)


	### code it yourself for marginal effects at the mean

	summary(dat1)

	b0 <- 5.92972 # estimated intercept from logit
	b1 <- -0.14099 # estimated b from logit

	xvar <- 39.5 # reference value (i.e. mean)for explanatory variable
	d <- .0001 # incremental change in x

	xbi <- (xvar + d)*b1 + b0
	xbj <- (xvar - d)*b1 + b0
	meff <- ((exp(xbi)/(1+exp(xbi)))-(exp(xbj)/(1+exp(xbj))))/(d*2) ;
	print(meff)


	### a different perhaps easier formulation for me at the mean

	XB <- xvar*b1 + b0 # this could be expanded for multiple b's or x's
	meffx <- (exp(XB)/((1+exp(XB))^2))*b1
	print(meffx)


	### averaging the meff for the whole data set

	dat1$XB <- dat1$age*b1 + b0

	meffx <- (exp(dat1$XB)/((1+exp(dat1$XB))^2))*b1
	summary(meffx) # get mean

	#### marginal effects from linear model

	lpm <- lm(dat1$participate~dat1$age)
	summary(lpm)

	#---------------------------------------------------
	#
	#
	# multivariable case
	#
	#
	#---------------------------------------------------

	dat2 <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv")

	head(dat2)

	summary(dat2) # summary stats

	#### run logistic regression model

	mylogit <- glm(admit ~ gre + gpa, data = dat2, family = "binomial")
	summary(mylogit)

	exp(cbind(OR = coef(mylogit), confint(mylogit))) # get odds ratios

	# marginal effects from logit
	mfx(mylogit)


	### code it yourself for marginal effects at the mean

	summary(dat1)

	b0 <- -4.949378 # estimated intercept from logit
	b1 <- 0.002691 # estimated b for gre
	b2 <- 0.754687 # estimated b for gpa

	x1 <- 587 # reference value (i.e. mean)for gre
	x2 <- 3.39 # reference value (i.e. mean)for gre
	d <- .0001 # incremental change in x

	# meff at means for gre
	xbi <- (x1 + d)b1 + b2x2 + b0
	xbj <- (x1 - d)b1 + b2x2 + b0
	meff <- ((exp(xbi)/(1+exp(xbi)))-(exp(xbj)/(1+exp(xbj))))/(d*2) ;
	print(meff)

	# meff at means for gpa
	xbi <- (x2 + d)b2 + b1x1 + b0
	xbj <- (x2 - d)b2 + b1x1 + b0
	meff <- ((exp(xbi)/(1+exp(xbi)))-(exp(xbj)/(1+exp(xbj))))/(d*2) ;
	print(meff)


	### a different perhaps easier formulation for me at the mean

	XB <- x1b1 +x2b2 + b0 # this could be expanded for multiple b's or x's

	# meff at means for gre
	meffx <- (exp(XB)/((1+exp(XB))^2))*b1
	print(meffx)

	# meff at means for gpa
	meffx <- (exp(XB)/((1+exp(XB))^2))*b2
	print(meffx)

	### averaging the meff for the whole data set

	dat2$XB <- dat2$greb1 + dat2$gpab2 + b0

	# sample avg meff for gre
	meffx <- (exp(dat1$XB)/((1+exp(dat1$XB))^2))*b1
	summary(meffx) # get mean

	# sample avg meff for gpa
	meffx <- (exp(dat1$XB)/((1+exp(dat1$XB))^2))*b2
	summary(meffx) # get mean

	#### marginal effects from linear model

	lpm <- lm(admit ~ gre + gpa, data = dat2)
	summary(lpm)