Skip to content

Instantly share code, notes, and snippets.

@sebnyberg
Created May 11, 2017 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sebnyberg/038697a0f4db56bec13f9cfe86fa6645 to your computer and use it in GitHub Desktop.
Save sebnyberg/038697a0f4db56bec13f9cfe86fa6645 to your computer and use it in GitHub Desktop.
# ========================================
# ==============[ Q 3.1.1 ]===============
# ========================================
# load data
load('hilda.Rdata')
source('helpers.R')
# redifine the hospitaldays to be 1 for anything above 1
hilda$hospdays[(hilda$hospdays >= 1)] = 1
hilda$hospdays
# redefine all answers which were "other" or "not answered" to be NA
hilda$health[(hilda$health >= 4)] = NA
hilda$health <- factor(hilda$health, labels=c("good", "bad", "inbetween"))
hilda$health
# check that all covariates are sigificantly different from zero
model.no.ref <- glm(hilda$hospdays ~ hilda$health - 1, family="binomial")
# ^^^^^^ this makes categories non-relative to the reference
summary(model.no.ref)
# they are significantly different from zero
# lets try a model with healthgood as a reference category
model.ref.good <- glm(hilda$hospdays ~ hilda$health, family="binomial")
summary(model.ref.good)
# lets check if healthbad is significantly different from health inbetween
model.ref.bad <- glm(hilda$hospdays ~ factor(hilda$health, levels=c("bad","inbetween","good")), family="binomial")
summary(model.ref.bad)
# inbetween is not significantly different from bad (the reference category)...
# we should consider using a model which merges the categories.
# Lets check the ANOVA for the merged vs split model.
# create merged health
healthmerged <- hilda$health
levels(healthmerged) <- c("good", "not good", "not good")
# compare models
model.original <- glm(hilda$hospdays ~ hilda$health, family=binomial)
model.merged <- glm(hilda$hospdays ~ healthmerged, family=binomial)
# anova will use a likelihood ratio test to compare the models
anova(model.merged, model.original)
qchisq(1-0.05,1)
# since 3.444 < 3.841459 we can not reject the null hypothesis (that the two models are the same)
# thus, merging the models does not result in a loss or improvement either way
model.health <- model.merged
# merge the groups, since the Pr(>|z|) was high (0.6)
hilda$health <- healthmerged
# ========================================
# ==============[ Q 3.1.2 ]===============
# ========================================
# since the model coefficients are w.r.t. the reference category (good)
# the odds ratio for people with not good as compared to good is
ln.or.notgood <- model.health$coefficients[1]
or.notgood <- exp(ln.or.notgood)
# the S.E. is the second diagonal element in the covariance matrix
# of our model
se <- sqrt(diag(summary(model.health)$cov.unscaled))
se.or.notgood <- se[2]
# calculate bounds
lo <- ln.or.notgood - 1.96 * se.or.notgood
hi <- ln.or.notgood + 1.96 * se.or.notgood
# calculate confidence interval for
ci.or.notgood.lo <- exp(lo)
ci.or.notgood.hi <- exp(hi)
# ========================================
# ==============[ Q 3.1.3 ]===============
# ========================================
beta0 <- model.health$coefficients[1]
beta1 <- model.health$coefficients[2]
ln.odds.notgood <- beta0 + beta1
odds.notgood <- exp(ln.odds.notgood)
se.notgood <- sqrt(se[2]^2 - se[1]^2)
# calculate bounds
lo.notgood <- ln.odds.notgood - 1.96 * se.notgood
hi.notgood <- ln.odds.notgood + 1.96 * se.notgood
# calculate confidence interval for odds of not good
ci.notgood.lo <- exp(lo.notgood)
ci.notgood.hi <- exp(hi.notgood)
# ========================================
# ==============[ Q 3.1.4 ]===============
# ========================================
# probability of having at least one hospital day for people with good health:
odds.good <- exp(beta0)
prob.good <- odds.good / (1 + odds.good)
se.good <- se[1]
lo.good <- beta0 - 1.96 * se.good
hi.good <- beta0 + 1.96 * se.good
ci.prob.good.lo <- exp(lo.good) / (1 + exp(lo.good))
ci.prob.good.hi <- exp(hi.good) / (1 + exp(hi.good))
# for not good health
prob.notgood <- odds.notgood / (1 + odds.notgood)
ci.prob.notgood.lo <- exp(lo.notgood) / (1 + exp(lo.notgood))
ci.prob.notgood.hi <- exp(hi.notgood) / (1 + exp(hi.notgood))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment