tslumley/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Predictive margins for generalised linear models in R

This code implements the thing that PREDMARG in SUDAAN does, as described in

Graubard B, Korn E (1999) "Predictive Margins with Survey Data" Biometrics 55:652-659
Bieler, Brown, Williams, & Brogan (2010) "Estimating Model-Adjusted Risks, Risk Differences, and Risk Ratios From Complex Survey Data" Am J Epi DOI: 10.1093/aje/kwp440


## example.R
library(survey)
library(haven)
## http://www.rti.org/sudaan/pdf_files/110Example/samadulted.zip, then unzip
d<-read_sas("~/Downloads/samadulted.sas7bdat")
d$educ_3<-with(d, cut(EDUC1, c(-Inf,14,15,21)))
d$age23_3<-with(d, cut(AGE_P, c(24,44,64,Inf)))
d$marry_3<-with(d, ifelse(R_MARITL %in% c(1,2,3), 1, ifelse(R_MARITL %in% 4,2, ifelse(R_MARITL %in% 5:8,3,NA))))
d$cantafmeds<-with(d, ifelse(AHCAFYR1==1,1,ifelse(AHCAFYR1 ==2,0,NA)))

d$educ_3<-relevel(d$educ_3,ref="(15,21]")
d$age23_3<-relevel(d$age23_3,ref="(64,Inf]")
d$sex<-factor(d$SEX,labels=c("Male","Female"))
d$marry_3<-factor(d$marry_3,labels=c("Married","Widowed","Unmarried"))

## design
nhis<-svydesign(~PSU_P,strata=~STRAT_P, weights=~WTFA_SA,data=d,nest=TRUE)

## model, to check against http://www.rti.org/sudaan/pdf_files/110Example/Logistic%20Example%206.pdf
model3<-svyglm(cantafmeds~sex+age23_3+educ_3+factor(REGION)+marry_3*sex,design=subset(nhis,AGE_P>24 & MRACRPI2==1),family=quasibinomial)

summary(model3)

## adjustment model only: leaves out sex, marry_3
model3a<-svyglm(cantafmeds~age23_3+educ_3+factor(REGION),design=subset(nhis,AGE_P>24 & MRACRPI2==1),family=quasibinomial)

## marginal means
predmarg<-svypredmeans(model3a, ~interaction(sex,marry_3))
predmarg

## contrasts of marginal means, with standard errors
svycontrast(predmarg,quote(Female.Married/Male.Married))
svycontrast(predmarg,quote(Male.Widowed/Male.Married))
svycontrast(predmarg,quote(Male.Unmarried/Male.Married))

## confidence intervals for relative risks: doesn't match
confint(svycontrast(predmarg,quote(Male.Unmarried/Male.Married)))
## because SUDAAN does it on a log scale
exp(confint(svycontrast(predmarg,quote(log(Male.Unmarried/Male.Married)))))

## example.txt
> source("~/Desktop/predmarg-example.R",echo=TRUE)

> library(survey)

> library(haven)

> ## http://www.rti.org/sudaan/pdf_files/110Example/samadulted.zip, then unzip
> d<-read_sas("~/Downloads/samadulted.sas7bdat")

> d$educ_3<-with(d, cut(EDUC1, c(-Inf,14,15,21)))

> d$age23_3<-with(d, cut(AGE_P, c(24,44,64,Inf)))

> d$marry_3<-with(d, ifelse(R_MARITL %in% c(1,2,3), 1, ifelse(R_MARITL %in% 4,2, ifelse(R_MARITL %in% 5:8,3,NA))))

> d$cantafmeds<-with(d, ifelse(AHCAFYR1==1,1,ifelse(AHCAFYR1 ==2,0,NA)))

> d$educ_3<-relevel(d$educ_3,ref="(15,21]")

> d$age23_3<-relevel(d$age23_3,ref="(64,Inf]")

> d$sex<-factor(d$SEX,labels=c("Male","Female"))

> d$marry_3<-factor(d$marry_3,labels=c("Married","Widowed","Unmarried"))

> ## design
> nhis<-svydesign(~PSU_P,strata=~STRAT_P, weights=~WTFA_SA,data=d,nest=TRUE)

> ## model, to check against http://www.rti.org/sudaan/pdf_files/110Example/Logistic%20Example%206.pdf
> model3<-svyglm(cantafmeds~sex+age23_3+educ_3+ .... [TRUNCATED]

> summary(model3)

Call:
svyglm(formula = cantafmeds ~ sex + age23_3 + educ_3 + factor(REGION) +
    marry_3 * sex, design = subset(nhis, AGE_P > 24 & MRACRPI2 ==
    1), family = quasibinomial)

Survey design:
subset(nhis, AGE_P > 24 & MRACRPI2 == 1)

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)
(Intercept)                -5.04637    0.20310 -24.847  < 2e-16 ***
sexFemale                   0.39113    0.11194   3.494 0.000552 ***
age23_3(24,44]              1.26534    0.15113   8.372 2.61e-15 ***
age23_3(44,64]              1.18223    0.14221   8.313 3.90e-15 ***
educ_3(-Inf,14]             0.89694    0.08081  11.099  < 2e-16 ***
educ_3(14,15]               0.88819    0.10274   8.645 4.00e-16 ***
factor(REGION)2             0.34526    0.12712   2.716 0.007012 **
factor(REGION)3             0.50505    0.12615   4.004 7.97e-05 ***
factor(REGION)4             0.35626    0.13812   2.579 0.010400 *
marry_3Widowed              0.74944    0.32832   2.283 0.023189 *
marry_3Unmarried            0.61621    0.11668   5.281 2.56e-07 ***
sexFemale:marry_3Widowed   -0.48401    0.35277  -1.372 0.171140
sexFemale:marry_3Unmarried  0.32104    0.14181   2.264 0.024337 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for quasibinomial family taken to be 0.9960176)

Number of Fisher Scoring iterations: 6


> ## adjustment model only: leaves out sex, marry_3
> model3a<-svyglm(cantafmeds~age23_3+educ_3+factor(REGION),design=subset(nhis,AGE_P>24 & MRACRPI2= .... [TRUNCATED]

> ## marginal means
> predmarg<-svypredmeans(model3a, ~interaction(sex,marry_3))

> predmarg
                     mean     SE
Female.Married   0.068405 0.0041
Male.Married     0.047594 0.0041
Male.Unmarried   0.083853 0.0064
Female.Unmarried 0.154317 0.0073
Male.Widowed     0.094393 0.0264
Female.Widowed   0.086931 0.0134

> ## contrasts of marginal means, with standard errors
> svycontrast(predmarg,quote(Female.Married/Male.Married))
          nlcon     SE
contrast 1.4373 0.1497

> svycontrast(predmarg,quote(Male.Widowed/Male.Married))
          nlcon     SE
contrast 1.9833 0.5769

> svycontrast(predmarg,quote(Male.Unmarried/Male.Married))
          nlcon     SE
contrast 1.7618 0.1891

> ## confidence intervals for relative risks: doesn't match
> confint(svycontrast(predmarg,quote(Male.Unmarried/Male.Married)))
            2.5 %   97.5 %
contrast 1.391145 2.132516

> ## because SUDAAN does it on a log scale
> exp(confint(svycontrast(predmarg,quote(log(Male.Unmarried/Male.Married)))))
            2.5 %   97.5 %
contrast 1.427544 2.174396
>

## svypredmeans.R
svypredmeans<-function(adjustmodel, groupfactor){

	design<-eval(bquote(update(adjustmodel$survey.design, .groupfactor=.(groupfactor[[2]]))))
	groups<-unique(model.frame(design)$.groupfactor)
	groups<-groups[!is.na(groups)]
	model<-update(adjustmodel, .~.+.groupfactor,design=design)
	w<-weights(design,"sampling")

	fits<-matrix(nrow=NROW(design),ncol=length(groups))
	dg_deta<-matrix(nrow=length(coef(model)),ncol=length(groups))
	for(i in 1:length(groups)){
		mf<-model.frame(design)
		mf$.groupfactor<-groups[i]
		mu<-predict(model,newdata=mf,type="response",se.fit=FALSE)
		eta<-predict(model,newdata=mf,type="link",se.fit=FALSE)
		fits[,i]<-coef(mu)

		mm<-model.matrix(terms(model),mf)
		dg_deta[,i]<-t(colSums(w*model$family$mu.eta(eta)*mm))/sum(w)
	}
	colnames(fits)<-as.character(groups)
	cond<-svymean(fits,design)
	addvar<-t(dg_deta)%*%vcov(model)%*%dg_deta
	vv<-addvar+attr(cond,"var")
	attr(vv,"parts")<-list(addvar,attr(cond,"var"))
	attr(cond,"var")<-vv
	cond
}
	library(survey)
	library(haven)
	## http://www.rti.org/sudaan/pdf_files/110Example/samadulted.zip, then unzip
	d<-read_sas("~/Downloads/samadulted.sas7bdat")
	d$educ_3<-with(d, cut(EDUC1, c(-Inf,14,15,21)))
	d$age23_3<-with(d, cut(AGE_P, c(24,44,64,Inf)))
	d$marry_3<-with(d, ifelse(R_MARITL %in% c(1,2,3), 1, ifelse(R_MARITL %in% 4,2, ifelse(R_MARITL %in% 5:8,3,NA))))
	d$cantafmeds<-with(d, ifelse(AHCAFYR1==1,1,ifelse(AHCAFYR1 ==2,0,NA)))

	d$educ_3<-relevel(d$educ_3,ref="(15,21]")
	d$age23_3<-relevel(d$age23_3,ref="(64,Inf]")
	d$sex<-factor(d$SEX,labels=c("Male","Female"))
	d$marry_3<-factor(d$marry_3,labels=c("Married","Widowed","Unmarried"))

	## design
	nhis<-svydesign(~PSU_P,strata=~STRAT_P, weights=~WTFA_SA,data=d,nest=TRUE)

	## model, to check against http://www.rti.org/sudaan/pdf_files/110Example/Logistic%20Example%206.pdf
	model3<-svyglm(cantafmeds~sex+age23_3+educ_3+factor(REGION)+marry_3*sex,design=subset(nhis,AGE_P>24 & MRACRPI2==1),family=quasibinomial)

	summary(model3)

	## adjustment model only: leaves out sex, marry_3
	model3a<-svyglm(cantafmeds~age23_3+educ_3+factor(REGION),design=subset(nhis,AGE_P>24 & MRACRPI2==1),family=quasibinomial)

	## marginal means
	predmarg<-svypredmeans(model3a, ~interaction(sex,marry_3))
	predmarg

	## contrasts of marginal means, with standard errors
	svycontrast(predmarg,quote(Female.Married/Male.Married))
	svycontrast(predmarg,quote(Male.Widowed/Male.Married))
	svycontrast(predmarg,quote(Male.Unmarried/Male.Married))

	## confidence intervals for relative risks: doesn't match
	confint(svycontrast(predmarg,quote(Male.Unmarried/Male.Married)))
	## because SUDAAN does it on a log scale
	exp(confint(svycontrast(predmarg,quote(log(Male.Unmarried/Male.Married)))))
	> source("~/Desktop/predmarg-example.R",echo=TRUE)

	> library(survey)

	> library(haven)

	> ## http://www.rti.org/sudaan/pdf_files/110Example/samadulted.zip, then unzip
	> d<-read_sas("~/Downloads/samadulted.sas7bdat")

	> d$educ_3<-with(d, cut(EDUC1, c(-Inf,14,15,21)))

	> d$age23_3<-with(d, cut(AGE_P, c(24,44,64,Inf)))

	> d$marry_3<-with(d, ifelse(R_MARITL %in% c(1,2,3), 1, ifelse(R_MARITL %in% 4,2, ifelse(R_MARITL %in% 5:8,3,NA))))

	> d$cantafmeds<-with(d, ifelse(AHCAFYR1==1,1,ifelse(AHCAFYR1 ==2,0,NA)))

	> d$educ_3<-relevel(d$educ_3,ref="(15,21]")

	> d$age23_3<-relevel(d$age23_3,ref="(64,Inf]")

	> d$sex<-factor(d$SEX,labels=c("Male","Female"))

	> d$marry_3<-factor(d$marry_3,labels=c("Married","Widowed","Unmarried"))

	> ## design
	> nhis<-svydesign(~PSU_P,strata=~STRAT_P, weights=~WTFA_SA,data=d,nest=TRUE)

	> ## model, to check against http://www.rti.org/sudaan/pdf_files/110Example/Logistic%20Example%206.pdf
	> model3<-svyglm(cantafmeds~sex+age23_3+educ_3+ .... [TRUNCATED]

	> summary(model3)

	Call:
	svyglm(formula = cantafmeds ~ sex + age23_3 + educ_3 + factor(REGION) +
	marry_3 * sex, design = subset(nhis, AGE_P > 24 & MRACRPI2 ==
	1), family = quasibinomial)

	Survey design:
	subset(nhis, AGE_P > 24 & MRACRPI2 == 1)

	Coefficients:
	Estimate Std. Error t value Pr(>\|t\|)
	(Intercept) -5.04637 0.20310 -24.847 < 2e-16 ***
	sexFemale 0.39113 0.11194 3.494 0.000552 ***
	age23_3(24,44] 1.26534 0.15113 8.372 2.61e-15 ***
	age23_3(44,64] 1.18223 0.14221 8.313 3.90e-15 ***
	educ_3(-Inf,14] 0.89694 0.08081 11.099 < 2e-16 ***
	educ_3(14,15] 0.88819 0.10274 8.645 4.00e-16 ***
	factor(REGION)2 0.34526 0.12712 2.716 0.007012 **
	factor(REGION)3 0.50505 0.12615 4.004 7.97e-05 ***
	factor(REGION)4 0.35626 0.13812 2.579 0.010400 *
	marry_3Widowed 0.74944 0.32832 2.283 0.023189 *
	marry_3Unmarried 0.61621 0.11668 5.281 2.56e-07 ***
	sexFemale:marry_3Widowed -0.48401 0.35277 -1.372 0.171140
	sexFemale:marry_3Unmarried 0.32104 0.14181 2.264 0.024337 *
	---
	Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

	(Dispersion parameter for quasibinomial family taken to be 0.9960176)

	Number of Fisher Scoring iterations: 6


	> ## adjustment model only: leaves out sex, marry_3
	> model3a<-svyglm(cantafmeds~age23_3+educ_3+factor(REGION),design=subset(nhis,AGE_P>24 & MRACRPI2= .... [TRUNCATED]

	> ## marginal means
	> predmarg<-svypredmeans(model3a, ~interaction(sex,marry_3))

	> predmarg
	mean SE
	Female.Married 0.068405 0.0041
	Male.Married 0.047594 0.0041
	Male.Unmarried 0.083853 0.0064
	Female.Unmarried 0.154317 0.0073
	Male.Widowed 0.094393 0.0264
	Female.Widowed 0.086931 0.0134

	> ## contrasts of marginal means, with standard errors
	> svycontrast(predmarg,quote(Female.Married/Male.Married))
	nlcon SE
	contrast 1.4373 0.1497

	> svycontrast(predmarg,quote(Male.Widowed/Male.Married))
	nlcon SE
	contrast 1.9833 0.5769

	> svycontrast(predmarg,quote(Male.Unmarried/Male.Married))
	nlcon SE
	contrast 1.7618 0.1891

	> ## confidence intervals for relative risks: doesn't match
	> confint(svycontrast(predmarg,quote(Male.Unmarried/Male.Married)))
	2.5 % 97.5 %
	contrast 1.391145 2.132516

	> ## because SUDAAN does it on a log scale
	> exp(confint(svycontrast(predmarg,quote(log(Male.Unmarried/Male.Married)))))
	2.5 % 97.5 %
	contrast 1.427544 2.174396
	>
	svypredmeans<-function(adjustmodel, groupfactor){

	design<-eval(bquote(update(adjustmodel$survey.design, .groupfactor=.(groupfactor[[2]]))))
	groups<-unique(model.frame(design)$.groupfactor)
	groups<-groups[!is.na(groups)]
	model<-update(adjustmodel, .~.+.groupfactor,design=design)
	w<-weights(design,"sampling")

	fits<-matrix(nrow=NROW(design),ncol=length(groups))
	dg_deta<-matrix(nrow=length(coef(model)),ncol=length(groups))
	for(i in 1:length(groups)){
	mf<-model.frame(design)
	mf$.groupfactor<-groups[i]
	mu<-predict(model,newdata=mf,type="response",se.fit=FALSE)
	eta<-predict(model,newdata=mf,type="link",se.fit=FALSE)
	fits[,i]<-coef(mu)

	mm<-model.matrix(terms(model),mf)
	dg_deta[,i]<-t(colSums(wmodel$family$mu.eta(eta)mm))/sum(w)
	}
	colnames(fits)<-as.character(groups)
	cond<-svymean(fits,design)
	addvar<-t(dg_deta)%%vcov(model)%%dg_deta
	vv<-addvar+attr(cond,"var")
	attr(vv,"parts")<-list(addvar,attr(cond,"var"))
	attr(cond,"var")<-vv
	cond
	}