Created
October 18, 2015 13:20
-
-
Save dreidpath/76bc76a1cecb1d454801 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################################################## | |
# Analysis of Blood glucose / HbA1C data to show that y ~ x is not the same as x ~ y. # | |
# Author: Daniel D Reidpath, Pascale Allotey, & Mark R Diamond # | |
# Date: 18 October 2015 # | |
########################################################################################################## | |
# This analysis supports a research note (doi: 10.4225/03/56239BD13C5E0) | |
# | |
# | |
# Load the data. These are derived from Daramola (2012) and held on the CMU StatLib---Datasets Archive | |
data <- read.csv('http://lib.stat.cmu.edu/datasets/hba1c_bloodGlucose.dat', header=T, sep='\t', skip=30) | |
# Rename the random blood glucose variable 'mmol' -- it keeps the unit of measure straight in my head | |
names(data)[2] <- 'rgb_mmol' | |
# Create an HbA1c variable as mmol/mol | |
data$mmolmol <- (data$hba1c-2.15)*10.929 # HbA1c expressed in IFCC units | |
# Set the random seed, so that anyone replicating the analysis will get the same results | |
set.seed(seed=12146) | |
# Randomly sample 20 of the 349 patients ... it will be easier to see what's happening in the graph with 20 | |
data_20 <- data[sample(1:nrow(data), 20, replace=F), ] | |
############## | |
## Figure 1 ## | |
############## | |
# Plot the sample of 20 data points | |
plot(data_20$rgb_mmol, data_20$mmolmol, | |
xlab='Blood Glucose (mmol/L)', xlim=c(0,30), | |
ylab='', ylim=c(0,200)) | |
# Place the y-axis label nicely | |
mtext(side=2, line=2.3, expression(paste('HbA'['1c'],' (mmol/mol)'))) | |
# Calculate the OLS regression model for hba1c ~ blood glucose | |
xy.model <- lm(data_20$mmolmol ~ data_20$rgb_mmol) | |
# Place the line of best fit | |
abline(xy.model, lty=1) | |
# Calculate the OLS regression model for blood glucose ~ hba1c | |
yx.model <- lm(data_20$rgb_mmol ~ data_20$mmolmol) | |
# Place the line of best fit | |
lines(c((-10*yx.model$coef[2]+yx.model$coef[1]), (220*yx.model$coef[2]+yx.model$coef[1])),c(-10, 220), lty=2) | |
# Demonstrate residuals y_e | |
points(data_20$rgb_mmol[6], data_20$mmolmol[6], pch=20, col='Black') | |
lines(c(data_20$rgb_mmol[6], (data_20$mmolmol[6]*yx.model$coef[2] + yx.model$coef[1])), | |
c(data_20$mmolmol[6], data_20$mmolmol[6]), | |
lty=3, col='Black', lwd=1) | |
text(21, 137, expression("y"[e])) | |
# Demonstrate residuals x_e | |
points(data_20$rgb_mmol[2], data_20$mmolmol[2], pch=20, col='Black') | |
lines(c(data_20$rgb_mmol[2], data_20$rgb_mmol[2]), | |
c(data_20$mmolmol[2], data_20$rgb_mmol[2]*xy.model$coef[2] + xy.model$coef[1]), | |
lty=3, col='Black', lwd=1) | |
text(28, 69.4, expression("x"[e])) | |
# Note that the R_sqr for the two models is identical | |
summary(xy.model) | |
summary(yx.model) | |
############## | |
## Figure 2 ## | |
############## | |
# Plot all 349 data points in light grey | |
plot(data$rgb_mmol, data$mmolmol, | |
xlab='Blood Glucose (mmol/L)', xlim=c(0,50), | |
ylab='', | |
ylim=c(0,250), | |
col='Grey', pch=20) | |
# Place the y-axis label nicely | |
mtext(side=2, line=2.3, expression(paste('HbA'['1c'],' (mmol/mol)'))) | |
# Calculate the OLS regression model for hba1c ~ blood glucose | |
xy.model <- lm(data$mmolmol ~ data$rgb_mmol) | |
# Place the line of best fit | |
abline(xy.model, lty=1) | |
# Calculate the OLS regression model for blood glucose ~ hba1c | |
yx.model <- lm(data$rgb_mmol ~ data$mmolmol) | |
# Place the line of best fit | |
lines(c((-10*yx.model$coef[2]+yx.model$coef[1]), (300*yx.model$coef[2]+yx.model$coef[1])),c(-10, 300), lty=2) | |
# Mark the approximate point of intersection of the two lines | |
points(13.2, 80, pch=15, col='Black', cex=1.5) | |
# Label the line predicting HbA1c from blood glucose | |
text(40, 145, expression("Predict HbA"["1c"]), cex=0.7) | |
# Label the line predicting blood glucose from HbA1c | |
text(21, 220, "Predict Blood Glucose", cex=0.7) | |
# Note that the R_sqr for the two models is identical | |
summary(xy.model) | |
summary(yx.model) | |
############## | |
## Figure 3 ## | |
############## | |
# Plot all 349 data points in light grey and add the errors for a point | |
plot(data$rgb_mmol, data$mmolmol, | |
xlab='Blood Glucose (mmol/L)', xlim=c(0,50), | |
ylab='', | |
ylim=c(0,250), | |
col='Grey', pch=20) | |
# Place the y-axis label nicely | |
mtext(side=2, line=2.3, expression(paste('HbA'['1c'],' (mmol/mol)'))) | |
# Calculate the OLS regression model for hba1c ~ blood glucose | |
xy.model <- lm(data$mmolmol ~ data$rgb_mmol) | |
# Place the line of best fit | |
abline(xy.model, lty=1) | |
# Calculate the OLS regression model for blood glucose ~ hba1c | |
yx.model <- lm(data$rgb_mmol ~ data$mmolmol) | |
# Place the line of best fit | |
lines(c((-10*yx.model$coef[2]+yx.model$coef[1]), (300*yx.model$coef[2]+yx.model$coef[1])),c(-10, 300), lty=2) | |
# Mark the approximate point of intersection of the two lines | |
points(13.2, 80, pch=15, col='Black', cex=1.5) | |
# Label the line predicting HbA1c from blood glucose | |
text(40, 145, expression("Predict HbA"["1c"]), cex=0.7) | |
# Label the line predicting blood glucose from HbA1c | |
text(21, 220, "Predict Blood Glucose", cex=0.7) | |
# Demonstrate residuals y_e | |
points(data$rgb_mmol[348], data$mmolmol[348], pch=20, col='Black') | |
lines(c(data$rgb_mmol[348], (data$mmolmol[348]*yx.model$coef[2] + yx.model$coef[1])), | |
c(data$mmolmol[348], data$mmolmol[348]), | |
lty=3, col='Black', lwd=1) | |
text(32.5, 205, expression("y"[e])) | |
# Demonstrate residuals x_e | |
points(data$rgb_mmol[348], data$mmolmol[348], pch=20, col='Black') | |
lines(c(data$rgb_mmol[348], data$rgb_mmol[348]), | |
c(data$mmolmol[348], data$rgb_mmol[348]*xy.model$coef[2] + xy.model$coef[1]), | |
lty=3, col='Black', lwd=1) | |
text(36, 175, expression("x"[e])) | |
# Note that the R_sqr for the two models is identical | |
summary(xy.model) | |
summary(yx.model) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment