Last active
March 23, 2017 08:48
-
-
Save kkholst/1cedd0185219511189a7c130d7cf78ae to your computer and use it in GitHub Desktop.
Epidemiology
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Solutions to selected exercises... | |
ihd: Exercise 2.3 | |
bcg: Exercise 2.4 | |
estradiol.sas: Mediation exercise | |
estradiol.R : Mediation exercise |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data bcgdata; | |
filename bcgfile url 'http://publicifsv.sund.ku.dk/~kkho/data/bcgalldata.txt'; | |
infile bcgfile firstobs=2; | |
input age scar status $ n; | |
run; | |
proc print data=bcgdata (obs=50) ; | |
run ; | |
/* | |
To do a particular analysis we must select cases and a particular | |
set of controls with the where-statement, so the first analysis is | |
with the total set of controls. | |
We use the estimate statements to give the ORs of leprosy between | |
age-classes, using both the first and last age-class as reference. | |
Note that the s.e. of the log-odds-ratio is very large when we use | |
the first age-class with only 2 cases as reference. | |
*/ | |
proc genmod data=bcgdata; | |
where status='case' or status='conall' ; | |
class age scar; | |
model status = age scar / dist=bin type3 ; | |
weight n; | |
estimate "BCG scar | |
" scar -1 0 / exp ; | |
estimate "Age 2 vs. 1" age -1 1 0 0 0 0 0 / exp ; | |
estimate "Age 3 vs. 1" age -1 0 1 0 0 0 0 / exp ; | |
estimate "Age 4 vs. 1" age -1 0 0 1 0 0 0 / exp ; | |
estimate "Age 5 vs. 1" age -1 0 0 0 1 0 0 / exp ; | |
estimate "Age 6 vs. 1" age -1 0 0 0 0 1 0 / exp ; | |
estimate "Age 7 vs. 1" age -1 0 0 0 0 0 1 / exp ; | |
run; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Uncomment these to install some dependencies | |
## install.packages("devtools") | |
## devtools::load_all("kkholst/lava") | |
## install.packages("mets") | |
library(mets) | |
## Load the data: | |
estradiol <- read.table("http://publicifsv.sund.ku.dk/~kkho/data/estradiol.txt",header=TRUE) | |
##Alternatively, load data from local subdirectory 'data': | |
##estradiol <- read.table("data/estradiol.txt",header=TRUE) | |
## Direct effects | |
l1 <- lm(neocort ~ testosterone+estradiol+age, data=estradiol) | |
summary(l1) | |
l2 <- lm(estradiol ~ testosterone+age, data=estradiol) | |
summary(l2) | |
## Joint model | |
m <- lvm(neocort~estradiol+testosterone+age, | |
estradiol~testosterone+age) | |
e <- estimate(m,estradiol) | |
coef(effects(e, neocort~testosterone)) | |
## Mediation proportion | |
estimate(e, function(x,...) { | |
ind <- x["neocort~estradiol"]*x["estradiol~testosterone"] | |
dir <- x["neocort~testosterone"] | |
total <- ind+dir | |
medprop <- ind/total | |
return(c(indirect=ind,direct=dir,total=total,mediationproportion=medprop)) | |
}, vcov=vcov(e)) | |
## Bootstrap confidence limits | |
f <- function(object,...) coef(effects(object,neocort~testosterone))[,1] | |
b <- bootstrap(e, fun=f, R=100, estimator="glm", mc.cores=4) | |
b |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data estradiol; | |
filename efile url "http://publicifsv.sund.ku.dk/~kkho/data/estradiol.txt"; | |
infile efile firstobs=2; | |
input neocort estra testo age mr; | |
run; | |
proc genmod data=estradiol plots=none; | |
model neocort = testo estra age /cl; | |
run; quit; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
* The grouped data (slightly altered from the book by C&H) ; | |
data ihdfreq; | |
input eksp agr pyrs cases; | |
lpyrs = log( pyrs ); | |
datalines; | |
1 0 346.87 2 | |
1 1 979.34 12 | |
1 2 699.14 14 | |
0 0 560.13 4 | |
0 1 1127.70 6 | |
0 2 794.15 8 | |
; | |
run; | |
* Make a Poisson regression of the rates ; | |
proc genmod data = ihdfreq ; | |
class eksp agr ; | |
model cases = eksp agr | |
/ dist = poisson offset = lpyrs type3 ; | |
estimate "low vs. high" eksp -1 1 / Exp ; | |
estimate "Rate E1 A0" intercept 1 eksp 1 0 agr 1 0 0 / Exp ; | |
run; | |
/* Q2 | |
Read the individual records from the diet data | |
*/ | |
data ihdindiv ; | |
* filename dietfile "/home/bendix/teach/Epi/KU-epi/data/diet.txt" ; | |
filename dietfile url "http://publicifsv.sund.ku.dk/~pka/epidata/diet.txt"; | |
infile dietfile firstobs=2 ; | |
input id doe dox chd dob job month energy height weight fat fibre ; | |
* This is to read the date variables correctly in and print them reabably ; | |
informat doe dox dob mmddyy10.; | |
format doe dox dob ddmmyy10.; | |
* We must compute the exposure variable ; | |
exposure = ( energy < 2.75 ) ; | |
drop job month energy height weight fat fibre ; | |
run ; | |
* Take a look at the individual records ; | |
proc print data = ihdindiv (obs=20) ; | |
run ; | |
/* Q3 | |
Include the two macros for time-splitting and tabulation ; | |
*/ | |
options source2 ; | |
filename Lexispr url "http://BendixCarstensen.com/Lexis/Lexis.sas"; | |
%inc Lexispr ; | |
filename PYtabpr url "http://BendixCarstensen.com/Lexis/PYtab.sas"; | |
%inc PYtabpr ; | |
/* Q4 | |
Timesplitting using the Lexis macro | |
*/ | |
%Lexis( data = ihdindiv, /* Dataset with original data */ | |
out = ihdsplit, /* Dataset with time-split data */ | |
entry = doe, /* Date of entry */ | |
exit = dox, /* Date of exit */ | |
fail = chd, /* Event (failure) indicator */ | |
origin = dob, /* Origin of the time-scale */ | |
scale = 365.25, /* Convert from input scale to braks-scale */ | |
breaks = 40 to 70 by 10, /* Where to split the time scale */ | |
left = agr ); /* The name of the new age-variable */ | |
/* Q6 | |
We can use proc print to inspect the resulting records | |
*/ | |
proc print data = ihdsplit (obs=20) ; | |
run ; | |
/* Q7 | |
We can use %PYtab to tabulate person-years and detahs: | |
*/ | |
%PYtab( data = ihdsplit, | |
class = exposure agr, | |
fail = chd, | |
risk = risk, | |
scale = 1000 ) ; | |
/* Q7a | |
But we can also use proc tabulate to replicate the table from C&H | |
in a readable form | |
*/ | |
proc tabulate data=ihdsplit missing noseps formchar=" ---- ---" ; | |
class agr exposure ; | |
var risk chd ; | |
table agr="Age" all="Total", | |
( exposure="Exposure" all="Total" ) * | |
sum=" " * ( chd="D"*f=6. risk="Y"*f=8.1 ) | |
/ rts = 15 ; | |
run ; | |
/* Q8 | |
Now fit the model to the time-split data - results are as for the | |
grouped data */ | |
proc genmod data = ihdsplit ; | |
class agr exposure ; | |
model chd = agr exposure / dist=poisson offset=lrisk ; | |
run ; | |
/* Q9 | |
Add an interaction between age and exposure and check that you | |
get the same test for interaction as with the grouped data. | |
*/ | |
proc genmod data = ihdsplit ; | |
class agr exposure ; | |
model chd = agr exposure agr*exposure / dist=poisson offset=lrisk type3 ; | |
run ; | |
/* Q10 | |
We fit the main effects model to the grouped data (6 obs-dataset) | |
and observe that the deviance is the same as the type-3 test for | |
interaction based on the individual data | |
*/ | |
proc genmod data = ihdfreq ; | |
class eksp agr ; | |
model cases = eksp agr | |
/ dist = poisson offset = lpyrs type3 ; | |
run; | |
/* We can split in smaller intervals and model age continuously */ | |
%Lexis( data = ihdindiv, /* Dataset with original data */ | |
out = ihdsplit, /* Dataset with time-split data */ | |
entry = doe, /* Date of entry */ | |
exit = dox, /* Date of exit */ | |
fail = chd, /* Event (failure) indicator */ | |
origin = dob, /* Origin of the time-scale */ | |
scale = 365.25, /* Convert from input scale to braks-scale */ | |
breaks = 40 to 70 by 1, /* Where to split the time scale */ | |
left = age ); /* The name of the new age-variable */ | |
/* And then use a spline model for the effect of age */ | |
data ihdsplit ; | |
set ihdsplit ; | |
a45 = max( 0, age-45 ) ; | |
a50 = max( 0, age-50 ) ; | |
a55 = max( 0, age-55 ) ; | |
a60 = max( 0, age-60 ) ; | |
a65 = max( 0, age-65 ) ; | |
run ; | |
/* Making predictions requires extra observations with missing chd */ | |
data ihdsplit ; | |
set ihdsplit end = last ; | |
prr = 0 ; * Indicator for prediction part of data ; | |
output ; | |
if last then | |
do exposure = 0, 1 ; | |
do age = 40 to 70 by 1 ; | |
prr = 1 ; * Indicator for prediction part of data ; | |
chd = . ; | |
a45 = max( 0, age-45 ) ; | |
a50 = max( 0, age-50 ) ; | |
a55 = max( 0, age-55 ) ; | |
a60 = max( 0, age-60 ) ; | |
a65 = max( 0, age-65 ) ; | |
output ; | |
end ; | |
end ; | |
run ; | |
/* Model using linear splines, outputting predictions */ | |
proc genmod data = ihdsplit ; | |
class exposure ; | |
model chd = age a45 a50 a55 a60 a65 exposure | |
/ dist=poisson offset=lrisk type3 ; | |
output out = plrate ( where = (prr=1) ) | |
xbeta = lograte | |
stdxbeta = selograte ; | |
run ; | |
data plrate ; | |
set plrate ; | |
* rates and confidence intervals ; | |
rate = exp( lograte ) * 1000 ; | |
rlo = exp( lograte - 1.96*selograte ) * 1000 ; | |
rhi = exp( lograte + 1.96*selograte ) * 1000 ; | |
* change to missing for expoure 0 / 1 ; | |
e1 = 1 ; e0 = 1 ; | |
if exposure = 0 then e1 = . ; else e0 = . ; | |
rate0 = rate*e0 ; rlo0 = rlo*e0 ; rhi0 = rhi*e0 ; | |
rate1 = rate*e1 ; rlo1 = rlo*e1 ; rhi1 = rhi*e1 ; | |
run ; | |
proc print data = plrate ; | |
where age < 46 ; | |
run ; | |
proc sgplot data = plrate ; | |
series x=age y=rate0 / lineattrs=(color=blue) legendlabel="un-exposed" name="r0"; | |
band x=age lower=rlo0 upper=rhi0 / fillattrs=(color=blue transparency=.9); | |
series x=age y=rate1 / lineattrs=(color=red pattern=dash) legendlabel="exposed" name="r1"; | |
band x=age lower=rlo1 upper=rhi1 / fillattrs=(color=red transparency=0.9); | |
xaxis label="Age" GRID VALUES = (40 to 70 by 5); | |
yaxis type=log logbase=10 logstyle=logexponent logvtype=expanded values=(1,2,5,10,20,50,100); | |
keylegend "r0" "r1"; | |
run ; | |
/* proc gplot data = plrate ; */ | |
/* plot ( rate0 rlo0 rhi0 ) * age / */ | |
/* overlay haxis=axis1 vaxis=axis2 ; * un-exposed ; */ | |
/* plot2 ( rate1 rlo1 rhi1 ) * age / */ | |
/* overlay haxis=axis1 vaxis=axis2 ; * exposed ; */ | |
/* axis1 order=(40 to 70 by 5) ; */ | |
/* axis2 logbase=10 logstyle=expand order=(1,2,5,10,20,50,100) ; */ | |
/* symbol1 w=5 i=join color=blue ; */ | |
/* symbol2 w=2 i=join color=blue ; */ | |
/* symbol3 w=2 i=join color=blue ; */ | |
/* symbol4 w=5 i=join color=red ; */ | |
/* symbol5 w=2 i=join color=red ; */ | |
/* symbol6 w=2 i=join color=red ; */ | |
/* run ; */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Data from Clayton & Hills Table 22.6, p. 221 */ | |
## Alternatively, read | |
ihd <- read.table("http://publicifsv.sund.ku.dk/~kkho/data/ihd-tab.txt",header=TRUE) | |
## Alternatively, input the data directly: | |
ihd <- data.frame(exposed=c(1,1,1,0,0,0), | |
age=c("40-49","50-59","60-69","40-49","50-59","60-69"), | |
pyears=c(311.9,878.1,667.5,607.9,1272.1,888.9), | |
cases=c(2,12,14,4,5,8)) | |
ihd$age <- relevel(ihd$age, ref="60-69") ## Use last (older) group as reference | |
## Model with constant rate within both exposure groups (Table 13.1) | |
g0 <- glm(cases ~ exposed+offset(log(pyears)),data=ihd,family=poisson) | |
summary(g0) | |
## Direct calculations gives exact same results: | |
y1 <- sum(subset(ihd,exposed==1)$pyears) | |
d1 <- sum(subset(ihd,exposed==1)$cases) | |
y0 <- sum(subset(ihd,exposed==0)$pyears) | |
d0 <- sum(subset(ihd,exposed==0)$cases) | |
r0 <- d0/y0 ## Rate in unexposed | |
r1 <- d1/y1 ## Rate in exposed | |
se0 <- 1/sqrt(d0) ## SE of log(r0) | |
se1 <- 1/sqrt(d1) ## SE of log(r0) | |
logrr = log(r1)-log(r0) ## Rate-ratio | |
selogrr <- (1/d0+1/d1)^.5 ## SE of RR | |
cbind(logr0=log(r0), logr0.se=se0, logrr=logrr, logrr=selogrr) | |
## Compare with poisson regression: | |
summary(g0) | |
## Q1: | |
## We fit the model with main effects of age and exposure, and we see | |
## we obtain the same estimates as in Clayton & Hill's book | |
g1 <- glm(cases ~ age + exposed+offset(log(pyears)),data=ihd,family=poisson) | |
summary(g1) | |
## Model without exposure effect | |
g2 <- glm(cases ~ age + offset(log(pyears)),data=ihd,family=poisson) | |
## LRT: | |
Q <- -2*(logLik(g2)-logLik(g1)) | |
1-pchisq(Q,df=1) | |
## Wald test: 0.0048, LRT: 0.0040 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
First read in the data as from the table in the book | |
*/ | |
data ihd; | |
input exposed age pyrs cases; | |
lpyrs=log(pyrs); | |
cards; | |
0 2 311.9 2 | |
0 1 878.1 12 | |
0 0 667.5 14 | |
1 2 607.9 4 | |
1 1 1272.1 5 | |
1 0 888.9 8 | |
; | |
run; | |
proc format; | |
value exposedfmt 0='exposed(<2750kcals)' | |
1='unexposed(>=2750kcals)'; | |
value agefmt 0='60-69' | |
1='50-59' | |
2='40-49'; | |
run; | |
/* Table 22.6 in Clayton & Hills */ | |
proc print data=ihd; | |
format age agefmt. exposed exposedfmt.; | |
run; | |
/* | |
* Comparison of rate within exposed and unexposed (see Table 13.1 Clayton & Hills) | |
*/ | |
proc genmod data=ihd; | |
format exposed exposedfmt.; | |
class exposed; | |
model cases=exposed / dist=poisson offset=lpyrs; | |
estimate "exp. vs. non-exp." exposed 1 -1 / exp; | |
run; | |
/* For comparison we see that we get the same estimates and standard errors using the usual formulas: */ | |
data ihdest; | |
pyears1=311.9+878.1+667.5; | |
pyears0=607.9+1272.1+888.9; | |
cases1=2+12+14; | |
cases0=4+5+8; | |
r1 = cases1/pyears1; /* rate in exposed */ | |
r0 = cases0/pyears0; /* rate in unexposed */ | |
lr0 =log(r0); | |
lrr =log(r1)-log(r0); /* log rate-ratio */ | |
lrr_se=sqrt(1/cases1+1/cases0); | |
put "log(RR) = " lrr ", S.E = " lrr_se; | |
run; | |
/* | |
Q1: | |
We fit the model with main effects of age and exposure, and we see | |
we obtain the same estimates as in Clayton \& Hill's book | |
*/ | |
proc genmod data=ihd; | |
format exposed exposedfmt. age agefmt.; | |
class exposed age; | |
model cases=exposed age / dist=poisson offset=lpyrs type3; | |
estimate "exp. vs. non-exp." exposed 1 -1 / exp; | |
estimate "50-59 vs. 40-49" | |
age 0 1 -1 / exp; | |
estimate "60-69 vs. 40-49" | |
age 1 0 -1 / exp; | |
run; | |
/* model without exposure effect */ | |
proc genmod data=ihd; | |
format age agefmt.; | |
class age; | |
model cases=age / dist=poisson offset=lpyrs type3; | |
run; | |
/* LRT */ | |
data ihdest; | |
Q=-2*(-16.0477-(-11.8982)); | |
pval=1-cdf("chisq",Q,1); | |
put "P-value (LRT): " pval; | |
run; | |
/* Wald test: 0.0048, LRT: 0.0040. */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data twe; | |
filename testfile url "http://www.biostat.ku.dk/~pka/epidata/testis.txt"; | |
* filename testfile "/folders/myfolders/testis.txt"; | |
infile testfile firstobs=2; | |
input SON_AGE SON_KOH MOTH_AGE PARITY PYRS NONSEMI SEMI CASES; | |
lpyrs=log(pyrs); | |
par2 = (parity>=2); | |
* Merging the the first two cohorts ; | |
if son_koh=1973 then son_koh=1968; | |
* Grouping cohort to 3 levels ; | |
if son_koh=1950 or son_koh=1958 then son_kohny=1; | |
else if son_koh=1963 then son_kohny=2; | |
else if son_koh=1968 or son_koh=1973 then son_kohny=3; | |
run; | |
/* | |
Q10 | |
*/ | |
title "Q10 Crude RR for parity 2+ vs 1"; | |
proc genmod data=twe; | |
class par2; | |
model cases = PAR2 / offset=lpyrs dist=poisson type3 ; | |
estimate "par 2+ vs par 1" PAR2 -1 1 / exp ; | |
run; | |
/* | |
Q11 | |
*/ | |
title "Q11a All testis cancers"; | |
proc genmod data=twe ; | |
class SON_AGE SON_KOH MOTH_AGE parity; | |
model cases = SON_AGE SON_KOH MOTH_AGE PARity | |
/ offset=lpyrs dist=poi type3; | |
estimate "par 2 vs. par 1" parity -1 1 0 0 /exp; | |
estimate "par 3 vs. par 1" parity -1 0 1 0 /exp; | |
estimate "par 4+ vs. par 1" parity -1 0 0 1 /exp; | |
run; | |
title "Q11b Non seminoma testis cancers"; | |
proc genmod data=twe; | |
class SON_AGE SON_KOH MOTH_AGE parity; | |
model nonsemi = SON_AGE SON_KOH MOTH_AGE PARity | |
/ offset=lpyrs dist=poi type3; | |
estimate "par 2 vs. par 1" parity -1 1 0 0 /exp; | |
estimate "par 3 vs. par 1" parity -1 0 1 0 /exp; | |
estimate "par 4+ vs. par 1" parity -1 0 0 1 /exp; | |
run; | |
/* Q12 | |
Estimates concerning Mother"s age at first birth | |
*/ | |
title "Q12: Mother"s age at boy"s birth - test of interaction" ; | |
proc genmod data=twe; | |
class SON_AGE SON_KOH MOTH_AGE par2; | |
model cases = SON_AGE SON_KOH MOTH_AGE par2 moth_age*par2 | |
/ offset=lpyrs dist=poi type3 ; | |
run; | |
title "Q12: Mother"s age at boy"s birth - interaction effects" ; | |
proc genmod data=twe; | |
class SON_AGE SON_KOH MOTH_AGE par2; | |
model cases = SON_AGE SON_KOH MOTH_AGE moth_age*par2 | |
/ offset=lpyrs dist=poi type3; | |
estimate "par2+ vs. 1, <20" moth_age*par2 -1 1 0 0 0 0 0 0 /exp; | |
estimate "par2+ vs. 1, 20-24" moth_age*par2 0 0 -1 1 0 0 0 0 /exp; | |
estimate "par2+ vs. 1, 25-29" moth_age*par2 0 0 0 0 -1 1 0 0 /exp; | |
estimate "par2+ vs. 1, 30+ " moth_age*par2 0 0 0 0 0 0 -1 1 /exp; | |
run; | |
/* Q13 | |
Estimates concerning Birth cohort of the son | |
*/ | |
title "Q13 Birth cohort of the son - test of interaction" ; | |
proc genmod data=twe ; | |
class SON_AGE MOTH_AGE son_kohny son_koh par2; | |
model cases=SON_AGE MOTH_AGE son_koh par2 son_kohny*par2 | |
/offset=lpyrs dist=poi type3; | |
run; | |
title "Q13 Birth cohort of the son - interaction effects" ; | |
proc genmod data=twe ; | |
class SON_AGE MOTH_AGE son_kohny son_koh par2; | |
model cases=SON_AGE MOTH_AGE son_koh son_kohny*par2 | |
/offset=lpyrs dist=poi type3; | |
estimate "par 2+ vs. 1, 1950-63" son_kohny*par2 -1 1 0 0 0 0 /exp; | |
estimate "par 2+ vs. 1, 1963-67" son_kohny*par2 0 0 -1 1 0 0 /exp; | |
estimate "par 2+ vs. 1, 1968-92" son_kohny*par2 0 0 0 0 -1 1 /exp; | |
run; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment