Skip to content

Instantly share code, notes, and snippets.

@tslumley
Last active December 17, 2017 14:34
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tslumley/2e74cd0ac12a671d2724 to your computer and use it in GitHub Desktop.
Save tslumley/2e74cd0ac12a671d2724 to your computer and use it in GitHub Desktop.
predictive margins like SUDAAN does them

Predictive margins for generalised linear models in R

This code implements the thing that PREDMARG in SUDAAN does, as described in

  • Graubard B, Korn E (1999) "Predictive Margins with Survey Data" Biometrics 55:652-659
  • Bieler, Brown, Williams, & Brogan (2010) "Estimating Model-Adjusted Risks, Risk Differences, and Risk Ratios From Complex Survey Data" Am J Epi DOI: 10.1093/aje/kwp440
library(survey)
library(haven)
## http://www.rti.org/sudaan/pdf_files/110Example/samadulted.zip, then unzip
d<-read_sas("~/Downloads/samadulted.sas7bdat")
d$educ_3<-with(d, cut(EDUC1, c(-Inf,14,15,21)))
d$age23_3<-with(d, cut(AGE_P, c(24,44,64,Inf)))
d$marry_3<-with(d, ifelse(R_MARITL %in% c(1,2,3), 1, ifelse(R_MARITL %in% 4,2, ifelse(R_MARITL %in% 5:8,3,NA))))
d$cantafmeds<-with(d, ifelse(AHCAFYR1==1,1,ifelse(AHCAFYR1 ==2,0,NA)))
d$educ_3<-relevel(d$educ_3,ref="(15,21]")
d$age23_3<-relevel(d$age23_3,ref="(64,Inf]")
d$sex<-factor(d$SEX,labels=c("Male","Female"))
d$marry_3<-factor(d$marry_3,labels=c("Married","Widowed","Unmarried"))
## design
nhis<-svydesign(~PSU_P,strata=~STRAT_P, weights=~WTFA_SA,data=d,nest=TRUE)
## model, to check against http://www.rti.org/sudaan/pdf_files/110Example/Logistic%20Example%206.pdf
model3<-svyglm(cantafmeds~sex+age23_3+educ_3+factor(REGION)+marry_3*sex,design=subset(nhis,AGE_P>24 & MRACRPI2==1),family=quasibinomial)
summary(model3)
## adjustment model only: leaves out sex, marry_3
model3a<-svyglm(cantafmeds~age23_3+educ_3+factor(REGION),design=subset(nhis,AGE_P>24 & MRACRPI2==1),family=quasibinomial)
## marginal means
predmarg<-svypredmeans(model3a, ~interaction(sex,marry_3))
predmarg
## contrasts of marginal means, with standard errors
svycontrast(predmarg,quote(Female.Married/Male.Married))
svycontrast(predmarg,quote(Male.Widowed/Male.Married))
svycontrast(predmarg,quote(Male.Unmarried/Male.Married))
## confidence intervals for relative risks: doesn't match
confint(svycontrast(predmarg,quote(Male.Unmarried/Male.Married)))
## because SUDAAN does it on a log scale
exp(confint(svycontrast(predmarg,quote(log(Male.Unmarried/Male.Married)))))
> source("~/Desktop/predmarg-example.R",echo=TRUE)
> library(survey)
> library(haven)
> ## http://www.rti.org/sudaan/pdf_files/110Example/samadulted.zip, then unzip
> d<-read_sas("~/Downloads/samadulted.sas7bdat")
> d$educ_3<-with(d, cut(EDUC1, c(-Inf,14,15,21)))
> d$age23_3<-with(d, cut(AGE_P, c(24,44,64,Inf)))
> d$marry_3<-with(d, ifelse(R_MARITL %in% c(1,2,3), 1, ifelse(R_MARITL %in% 4,2, ifelse(R_MARITL %in% 5:8,3,NA))))
> d$cantafmeds<-with(d, ifelse(AHCAFYR1==1,1,ifelse(AHCAFYR1 ==2,0,NA)))
> d$educ_3<-relevel(d$educ_3,ref="(15,21]")
> d$age23_3<-relevel(d$age23_3,ref="(64,Inf]")
> d$sex<-factor(d$SEX,labels=c("Male","Female"))
> d$marry_3<-factor(d$marry_3,labels=c("Married","Widowed","Unmarried"))
> ## design
> nhis<-svydesign(~PSU_P,strata=~STRAT_P, weights=~WTFA_SA,data=d,nest=TRUE)
> ## model, to check against http://www.rti.org/sudaan/pdf_files/110Example/Logistic%20Example%206.pdf
> model3<-svyglm(cantafmeds~sex+age23_3+educ_3+ .... [TRUNCATED]
> summary(model3)
Call:
svyglm(formula = cantafmeds ~ sex + age23_3 + educ_3 + factor(REGION) +
marry_3 * sex, design = subset(nhis, AGE_P > 24 & MRACRPI2 ==
1), family = quasibinomial)
Survey design:
subset(nhis, AGE_P > 24 & MRACRPI2 == 1)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.04637 0.20310 -24.847 < 2e-16 ***
sexFemale 0.39113 0.11194 3.494 0.000552 ***
age23_3(24,44] 1.26534 0.15113 8.372 2.61e-15 ***
age23_3(44,64] 1.18223 0.14221 8.313 3.90e-15 ***
educ_3(-Inf,14] 0.89694 0.08081 11.099 < 2e-16 ***
educ_3(14,15] 0.88819 0.10274 8.645 4.00e-16 ***
factor(REGION)2 0.34526 0.12712 2.716 0.007012 **
factor(REGION)3 0.50505 0.12615 4.004 7.97e-05 ***
factor(REGION)4 0.35626 0.13812 2.579 0.010400 *
marry_3Widowed 0.74944 0.32832 2.283 0.023189 *
marry_3Unmarried 0.61621 0.11668 5.281 2.56e-07 ***
sexFemale:marry_3Widowed -0.48401 0.35277 -1.372 0.171140
sexFemale:marry_3Unmarried 0.32104 0.14181 2.264 0.024337 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for quasibinomial family taken to be 0.9960176)
Number of Fisher Scoring iterations: 6
> ## adjustment model only: leaves out sex, marry_3
> model3a<-svyglm(cantafmeds~age23_3+educ_3+factor(REGION),design=subset(nhis,AGE_P>24 & MRACRPI2= .... [TRUNCATED]
> ## marginal means
> predmarg<-svypredmeans(model3a, ~interaction(sex,marry_3))
> predmarg
mean SE
Female.Married 0.068405 0.0041
Male.Married 0.047594 0.0041
Male.Unmarried 0.083853 0.0064
Female.Unmarried 0.154317 0.0073
Male.Widowed 0.094393 0.0264
Female.Widowed 0.086931 0.0134
> ## contrasts of marginal means, with standard errors
> svycontrast(predmarg,quote(Female.Married/Male.Married))
nlcon SE
contrast 1.4373 0.1497
> svycontrast(predmarg,quote(Male.Widowed/Male.Married))
nlcon SE
contrast 1.9833 0.5769
> svycontrast(predmarg,quote(Male.Unmarried/Male.Married))
nlcon SE
contrast 1.7618 0.1891
> ## confidence intervals for relative risks: doesn't match
> confint(svycontrast(predmarg,quote(Male.Unmarried/Male.Married)))
2.5 % 97.5 %
contrast 1.391145 2.132516
> ## because SUDAAN does it on a log scale
> exp(confint(svycontrast(predmarg,quote(log(Male.Unmarried/Male.Married)))))
2.5 % 97.5 %
contrast 1.427544 2.174396
>
svypredmeans<-function(adjustmodel, groupfactor){
design<-eval(bquote(update(adjustmodel$survey.design, .groupfactor=.(groupfactor[[2]]))))
groups<-unique(model.frame(design)$.groupfactor)
groups<-groups[!is.na(groups)]
model<-update(adjustmodel, .~.+.groupfactor,design=design)
w<-weights(design,"sampling")
fits<-matrix(nrow=NROW(design),ncol=length(groups))
dg_deta<-matrix(nrow=length(coef(model)),ncol=length(groups))
for(i in 1:length(groups)){
mf<-model.frame(design)
mf$.groupfactor<-groups[i]
mu<-predict(model,newdata=mf,type="response",se.fit=FALSE)
eta<-predict(model,newdata=mf,type="link",se.fit=FALSE)
fits[,i]<-coef(mu)
mm<-model.matrix(terms(model),mf)
dg_deta[,i]<-t(colSums(w*model$family$mu.eta(eta)*mm))/sum(w)
}
colnames(fits)<-as.character(groups)
cond<-svymean(fits,design)
addvar<-t(dg_deta)%*%vcov(model)%*%dg_deta
vv<-addvar+attr(cond,"var")
attr(vv,"parts")<-list(addvar,attr(cond,"var"))
attr(cond,"var")<-vv
cond
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment