dceoy/glm_confint.R

## glm_confint.R
#!/usr/bin/env Rscript

# Diabetes survey on Pima Indians
#
# variables:
#   'glucose'   Plasma glucose concentration at 2 hours in an oral glucose tolerance test
#   'diabetes'  Diabetes pedigree function
#   'test'      test whether the patient shows signs of diabetes (coded 0 if negative, 1 if positive)
data(pima, package = 'faraway')

#
# Logistic Regression
#
m <- glm(factor(test) ~ glucose + diabetes, family = binomial, data = pima)

print(summary(m))
#
# Call:
# glm(formula = test ~ glucose + diabetes, family = binomial, data = pima)
#
# Deviance Residuals:
#     Min       1Q   Median       3Q      Max
# -2.7557  -0.7801  -0.5295   0.8460   3.2897
#
# Coefficients:
#              Estimate Std. Error z value Pr(>|z|)
# (Intercept) -5.724331   0.447126 -12.803  < 2e-16 ***
# glucose      0.037356   0.003284  11.375  < 2e-16 ***
# diabetes     0.918678   0.273089   3.364 0.000768 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# (Dispersion parameter for binomial family taken to be 1)
#
#     Null deviance: 993.48  on 767  degrees of freedom
# Residual deviance: 796.99  on 765  degrees of freedom
# AIC: 802.99
#
# Number of Fisher Scoring iterations: 4
#

# Alpha Level
alpha <- 0.05

results <- list()

#
# Profile Likelihood CI
#
results$profile_likelihood_ci <- confint(m, level = 1 - alpha)

#
# Wald CI
#
results$wald_ci <- confint.default(m, level = 1 - alpha)

#
# Wald CI (manual calculation)
#
wald_ci <- function(model, alpha) {
  ce <- summary(model)$coefficients[,1:2]
  ci <- cbind(ce[,1] - qnorm(1 - alpha / 2) * ce[,2],
              ce[,1] + qnorm(1 - alpha / 2) * ce[,2])
  colnames(ci) <- c(paste((alpha / 2) * 100, '%'),
                    paste((1 - alpha / 2) * 100, '%'))
  return(ci)
}
results$wald_ci_manual_calc <- wald_ci(m, alpha)

#
# Odds Ratio
#
results$odds_ratio <- exp(cbind(Estimate = m$coefficients, confint(m, level = 1 - alpha)))

print(results)
#
# $profile_likelihood_ci
#                   2.5 %      97.5 %
# (Intercept) -6.62859431 -4.87394015
# glucose      0.03109338  0.04398227
# diabetes     0.38948850  1.46127936
#
# $wald_ci
#                   2.5 %      97.5 %
# (Intercept) -6.60068146 -4.84798018
# glucose      0.03091909  0.04379221
# diabetes     0.38343319  1.45392188
#
# $wald_ci_manual_calc
#                   2.5 %      97.5 %
# (Intercept) -6.60068146 -4.84798018
# glucose      0.03091909  0.04379221
# diabetes     0.38343319  1.45392188
#
# $odds_ratio
#                Estimate      2.5 %      97.5 %
# (Intercept) 0.003265538 0.00132202 0.007643191
# glucose     1.038062146 1.03158183 1.044963828
# diabetes    2.505974144 1.47622552 4.311471930
#

## glm_predict.R
#!/usr/bin/env R

sapply(c('dplyr', 'data.table', 'ggplot2'), function(p) require(p, character.only = TRUE))
select <- dplyr::select

# Diabetes survey on Pima Indians
data(pima, package = 'faraway')

#
# Logistic Regression
#
m <- glm(factor(test) ~ glucose + diabetes, family = binomial, data = pima)

# Alpha Level
alpha = 0.05

#
# New Data having various values of Glucose and Diabetes
#
new_d <- with(pima, data.table(glucose = rep(quantile(pima$glucose)[2:4], each = 100),
                               diabetes = rep(seq(from = min(diabetes),
                                                  to = max(diabetes),
                                                  length.out = 100)), 3))

#
# Prediction of Probabilities
#
prd_p <- new_d %>%
           predict(m, newdata = ., type = 'link', se.fit = TRUE) %>%
           as.data.table() %>%
           mutate(predicted_prob = plogis(fit),
                  lower_limit = plogis(fit - qnorm(1 - alpha / 2) * se.fit),
                  upper_limit = plogis(fit + qnorm(1 - alpha / 2) * se.fit)) %>%
           cbind(new_d, .) %>%
           select(glucose, diabetes, predicted_prob, lower_limit, upper_limit)

#
# Visualization
#
prd_p$glucose <- factor(prd_p$glucose)

ribbon <- ggplot(prd_p, aes(x = diabetes, y = predicted_prob)) +
            geom_ribbon(aes(ymin = lower_limit, ymax = upper_limit, fill = glucose), alpha = 0.2) +
            geom_line(aes(colour = glucose), size = 1) +
            labs(x = 'diabetes pedigree function', y = 'test of diabetes')

svg('plot.svg', width = 6, height = 4)
plot(ribbon)
dev.off()

## plot.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              plot.svg
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	#!/usr/bin/env Rscript

	# Diabetes survey on Pima Indians
	#
	# variables:
	# 'glucose' Plasma glucose concentration at 2 hours in an oral glucose tolerance test
	# 'diabetes' Diabetes pedigree function
	# 'test' test whether the patient shows signs of diabetes (coded 0 if negative, 1 if positive)
	data(pima, package = 'faraway')

	#
	# Logistic Regression
	#
	m <- glm(factor(test) ~ glucose + diabetes, family = binomial, data = pima)

	print(summary(m))
	#
	# Call:
	# glm(formula = test ~ glucose + diabetes, family = binomial, data = pima)
	#
	# Deviance Residuals:
	# Min 1Q Median 3Q Max
	# -2.7557 -0.7801 -0.5295 0.8460 3.2897
	#
	# Coefficients:
	# Estimate Std. Error z value Pr(>\|z\|)
	# (Intercept) -5.724331 0.447126 -12.803 < 2e-16 ***
	# glucose 0.037356 0.003284 11.375 < 2e-16 ***
	# diabetes 0.918678 0.273089 3.364 0.000768 ***
	# ---
	# Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	#
	# (Dispersion parameter for binomial family taken to be 1)
	#
	# Null deviance: 993.48 on 767 degrees of freedom
	# Residual deviance: 796.99 on 765 degrees of freedom
	# AIC: 802.99
	#
	# Number of Fisher Scoring iterations: 4
	#

	# Alpha Level
	alpha <- 0.05

	results <- list()

	#
	# Profile Likelihood CI
	#
	results$profile_likelihood_ci <- confint(m, level = 1 - alpha)

	#
	# Wald CI
	#
	results$wald_ci <- confint.default(m, level = 1 - alpha)

	#
	# Wald CI (manual calculation)
	#
	wald_ci <- function(model, alpha) {
	ce <- summary(model)$coefficients[,1:2]
	ci <- cbind(ce[,1] - qnorm(1 - alpha / 2) * ce[,2],
	ce[,1] + qnorm(1 - alpha / 2) * ce[,2])
	colnames(ci) <- c(paste((alpha / 2) * 100, '%'),
	paste((1 - alpha / 2) * 100, '%'))
	return(ci)
	}
	results$wald_ci_manual_calc <- wald_ci(m, alpha)

	#
	# Odds Ratio
	#
	results$odds_ratio <- exp(cbind(Estimate = m$coefficients, confint(m, level = 1 - alpha)))

	print(results)
	#
	# $profile_likelihood_ci
	# 2.5 % 97.5 %
	# (Intercept) -6.62859431 -4.87394015
	# glucose 0.03109338 0.04398227
	# diabetes 0.38948850 1.46127936
	#
	# $wald_ci
	# 2.5 % 97.5 %
	# (Intercept) -6.60068146 -4.84798018
	# glucose 0.03091909 0.04379221
	# diabetes 0.38343319 1.45392188
	#
	# $wald_ci_manual_calc
	# 2.5 % 97.5 %
	# (Intercept) -6.60068146 -4.84798018
	# glucose 0.03091909 0.04379221
	# diabetes 0.38343319 1.45392188
	#
	# $odds_ratio
	# Estimate 2.5 % 97.5 %
	# (Intercept) 0.003265538 0.00132202 0.007643191
	# glucose 1.038062146 1.03158183 1.044963828
	# diabetes 2.505974144 1.47622552 4.311471930
	#
	#!/usr/bin/env R

	sapply(c('dplyr', 'data.table', 'ggplot2'), function(p) require(p, character.only = TRUE))
	select <- dplyr::select

	# Diabetes survey on Pima Indians
	data(pima, package = 'faraway')

	#
	# Logistic Regression
	#
	m <- glm(factor(test) ~ glucose + diabetes, family = binomial, data = pima)

	# Alpha Level
	alpha = 0.05

	#
	# New Data having various values of Glucose and Diabetes
	#
	new_d <- with(pima, data.table(glucose = rep(quantile(pima$glucose)[2:4], each = 100),
	diabetes = rep(seq(from = min(diabetes),
	to = max(diabetes),
	length.out = 100)), 3))

	#
	# Prediction of Probabilities
	#
	prd_p <- new_d %>%
	predict(m, newdata = ., type = 'link', se.fit = TRUE) %>%
	as.data.table() %>%
	mutate(predicted_prob = plogis(fit),
	lower_limit = plogis(fit - qnorm(1 - alpha / 2) * se.fit),
	upper_limit = plogis(fit + qnorm(1 - alpha / 2) * se.fit)) %>%
	cbind(new_d, .) %>%
	select(glucose, diabetes, predicted_prob, lower_limit, upper_limit)

	#
	# Visualization
	#
	prd_p$glucose <- factor(prd_p$glucose)

	ribbon <- ggplot(prd_p, aes(x = diabetes, y = predicted_prob)) +
	geom_ribbon(aes(ymin = lower_limit, ymax = upper_limit, fill = glucose), alpha = 0.2) +
	geom_line(aes(colour = glucose), size = 1) +
	labs(x = 'diabetes pedigree function', y = 'test of diabetes')

	svg('plot.svg', width = 6, height = 4)
	plot(ribbon)
	dev.off()