Daniel D Reidpath dreidpath

## luhnTest.R
luhnTest <- function(cc){
  # cc: the number to test
  # Reverse the digits, convert to numeric vector; see: https://en.wikipedia.org/wiki/Luhn_algorithm
  # The code originally appeared in http://megasnippets.com/source-codes/r/luhn_test_credit_card_numbers

  cc2 <- as.numeric(unlist(strsplit(as.character(cc), "")))[nchar(cc):1]

  s1 <- 0
  s2 <- 0


## flatland-randomwalk.R
# A random-walk in flat-land.  Note that the world is a circle.

maxworld <- 1000  # Flatland is maxworld units long
p <- integer(maxworld/2)  # place p on the unit halfway between 0 and maxworld

dist <- c()  # dist: the distribution of "times it took to complete each iter(ation) of the randmo walk"

for(loop in 1:1000){  # Run the siumulation of random walks 1000 times
  world <- rep(0, maxworld)  # Set up the world, as maxworld-long unvisited units
  keep_going <- T  # keep_going determined whether a random walk simulation continues

## xmas-tree.R
# This appeared on R-blogger, a post by "wingfeet" from 23 December 2013: http://www.r-bloggers.com/merry-christmas-2/
# I stumbled acroos it and didn't want to lose it so I placed it here
part <- list(x0=0,y0=0,x1=0,y1=1,
             branch1=NULL,branch2=NULL,extend=NULL,
             lwd=1,depth=0,col='springgreen')

par(mfrow=c(1,1),mar=c(5, 4, 4, 2) + 0.1)
segplot <- function(tree) {
  if (is.null(tree)) return()
  segments(tree$x0,tree$y0,tree$x1,tree$y1,

## uk-pop-variation.R
# Data: Percentage variation within age group between the 1993 and 2016 UK populations
# The data original data from which the numbers were derived come from:
# https://www.census.gov/population/international/data/idb/region.php?N=%20Results%20&T=10&A=separate&RT=0&Y=1993,2016&R=-1&C=UK
variation <- c(-10.9589041096, -13.5802469136, -12.987012987, -8.8235294118, -4.5454545455,
               5.7971014493, 35.1851851852, 25.4901960784, 10.2040816327, 17.0212765957, 0,
               6.6666666667, 24.9855625305)

age_grp <- c("20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+")


## MY-stroke-registry2010_2014.R
# A Plot of age adjusted ischemic stroke annual incidence (per 100,000) in Malaysia.
# The data come from a paper by Aziz et al. (2015) published in the Journal of Stroke &
# Cerebrovascular Disease.  For a discussion see: http://wp.me/p6R8xa-Q

year <- 2010:2014
rate <- c(44.94, 49.6, 72.41, 81.94, 126.25)
model1 <- lm(rate ~ year + I(year^2))

plot(year, rate,
     xlab="Year",

## NRIC.R
#####################################################################
# A series of functions to manage Malaysian National Registration   #
# Identity Card Number (NRIC) data.  The NRIC was a form            #
# YYMMDD-PB-XXXG[*], where YYMMDD are six digits representing the   #
# date of birth/registration, PB is a two digit code representing   #
# the place of birth XXX is a random digit and G is also a random   #
# digit, the if G is even the NRIC is for a female and if it is odd #
# it is for a male.                                                 #
#-------------------------------------------------------------------#
# The functions:                                                    #

## gist:6ca24e5006fa165f494a
require("devtools")
#  Import a funrction that will take an R lm model expressed in terms of y ~ x. Provided values for y, return "estimated" x.
source_gist("https://gist.github.com/dreidpath/6ee2bb95c6010cff3874")

x <- 0:5  # Comment out this line on the second runs, and uncomment line 6
# x <- c(1, 0, 2:5)  # Run this one second, and comment out line 5
cat("Mean of x:", mean(x))
cat("Variance of x:", var(x))

y <- 5:0

## returnXinY.R
returnXinY <- function(lm_model, y_values){
  # This function takes a lm model expressed in terms of y ~ x, and values for y, and returns x
  # Given a model of the form y ~ beta.x + alpha, then given values for y, return x = (y-alpha)/beta
  # Crazy I know, but there you have it.
  #
  if(class(lm_model) != "lm"){
    stop("This function requires a lm mode with a single predictor variable (y ~x)")
  }
  if(length(unlist(strsplit(toString(lm_model$call[2][[1]]), " "))) > 3){
    stop("This function only works with a lm model with a single predictor variable (y ~x)")

## hba1c-rbg.R
##########################################################################################################
#  Analysis of Blood glucose / HbA1C data to show that y ~ x is not the same as x ~ y.                   #
#  Author: Daniel D Reidpath, Pascale Allotey, & Mark R Diamond                                          #
#  Date: 18 October 2015                                                                                 #
##########################################################################################################
# This analysis supports a research note (doi: 10.4225/03/56239BD13C5E0)
#
#
# Load the data.  These are derived from Daramola (2012) and held on the CMU StatLib---Datasets Archive
data <- read.csv('http://lib.stat.cmu.edu/datasets/hba1c_bloodGlucose.dat', header=T, sep='\t', skip=30)

## mergeall_function.R
#############################################################################
# Two functions to merge a series of dataframes. This is needed if you want #
# to create a single "flat file" from multiple dataframes.                  #
#############################################################################

# Create an infix function %<<% that will merge two data frames.
# Note Bene:
# the dataframes to merge must have a shared column with the matched unique
# id named "id".  You can vary this name by altering the code on line 12.
#
	luhnTest <- function(cc){
	# cc: the number to test
	# Reverse the digits, convert to numeric vector; see: https://en.wikipedia.org/wiki/Luhn_algorithm
	# The code originally appeared in http://megasnippets.com/source-codes/r/luhn_test_credit_card_numbers

	cc2 <- as.numeric(unlist(strsplit(as.character(cc), "")))[nchar(cc):1]

	s1 <- 0
	s2 <- 0
	# A random-walk in flat-land. Note that the world is a circle.

	maxworld <- 1000 # Flatland is maxworld units long
	p <- integer(maxworld/2) # place p on the unit halfway between 0 and maxworld

	dist <- c() # dist: the distribution of "times it took to complete each iter(ation) of the randmo walk"

	for(loop in 1:1000){ # Run the siumulation of random walks 1000 times
	world <- rep(0, maxworld) # Set up the world, as maxworld-long unvisited units
	keep_going <- T # keep_going determined whether a random walk simulation continues
	# This appeared on R-blogger, a post by "wingfeet" from 23 December 2013: http://www.r-bloggers.com/merry-christmas-2/
	# I stumbled acroos it and didn't want to lose it so I placed it here
	part <- list(x0=0,y0=0,x1=0,y1=1,
	branch1=NULL,branch2=NULL,extend=NULL,
	lwd=1,depth=0,col='springgreen')

	par(mfrow=c(1,1),mar=c(5, 4, 4, 2) + 0.1)
	segplot <- function(tree) {
	if (is.null(tree)) return()
	segments(tree$x0,tree$y0,tree$x1,tree$y1,
	# Data: Percentage variation within age group between the 1993 and 2016 UK populations
	# The data original data from which the numbers were derived come from:
	# https://www.census.gov/population/international/data/idb/region.php?N=%20Results%20&T=10&A=separate&RT=0&Y=1993,2016&R=-1&C=UK
	variation <- c(-10.9589041096, -13.5802469136, -12.987012987, -8.8235294118, -4.5454545455,
	5.7971014493, 35.1851851852, 25.4901960784, 10.2040816327, 17.0212765957, 0,
	6.6666666667, 24.9855625305)

	age_grp <- c("20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+")
	# A Plot of age adjusted ischemic stroke annual incidence (per 100,000) in Malaysia.
	# The data come from a paper by Aziz et al. (2015) published in the Journal of Stroke &
	# Cerebrovascular Disease. For a discussion see: http://wp.me/p6R8xa-Q

	year <- 2010:2014
	rate <- c(44.94, 49.6, 72.41, 81.94, 126.25)
	model1 <- lm(rate ~ year + I(year^2))

	plot(year, rate,
	xlab="Year",
	#####################################################################
	# A series of functions to manage Malaysian National Registration #
	# Identity Card Number (NRIC) data. The NRIC was a form #
	# YYMMDD-PB-XXXG[*], where YYMMDD are six digits representing the #
	# date of birth/registration, PB is a two digit code representing #
	# the place of birth XXX is a random digit and G is also a random #
	# digit, the if G is even the NRIC is for a female and if it is odd #
	# it is for a male. #
	#-------------------------------------------------------------------#
	# The functions: #
	require("devtools")
	# Import a funrction that will take an R lm model expressed in terms of y ~ x. Provided values for y, return "estimated" x.
	source_gist("https://gist.github.com/dreidpath/6ee2bb95c6010cff3874")

	x <- 0:5 # Comment out this line on the second runs, and uncomment line 6
	# x <- c(1, 0, 2:5) # Run this one second, and comment out line 5
	cat("Mean of x:", mean(x))
	cat("Variance of x:", var(x))

	y <- 5:0
	returnXinY <- function(lm_model, y_values){
	# This function takes a lm model expressed in terms of y ~ x, and values for y, and returns x
	# Given a model of the form y ~ beta.x + alpha, then given values for y, return x = (y-alpha)/beta
	# Crazy I know, but there you have it.
	#
	if(class(lm_model) != "lm"){
	stop("This function requires a lm mode with a single predictor variable (y ~x)")
	}
	if(length(unlist(strsplit(toString(lm_model$call[2][[1]]), " "))) > 3){
	stop("This function only works with a lm model with a single predictor variable (y ~x)")
	##########################################################################################################
	# Analysis of Blood glucose / HbA1C data to show that y ~ x is not the same as x ~ y. #
	# Author: Daniel D Reidpath, Pascale Allotey, & Mark R Diamond #
	# Date: 18 October 2015 #
	##########################################################################################################
	# This analysis supports a research note (doi: 10.4225/03/56239BD13C5E0)
	#
	#
	# Load the data. These are derived from Daramola (2012) and held on the CMU StatLib---Datasets Archive
	data <- read.csv('http://lib.stat.cmu.edu/datasets/hba1c_bloodGlucose.dat', header=T, sep='\t', skip=30)
	#############################################################################
	# Two functions to merge a series of dataframes. This is needed if you want #
	# to create a single "flat file" from multiple dataframes. #
	#############################################################################

	# Create an infix function %<<% that will merge two data frames.
	# Note Bene:
	# the dataframes to merge must have a shared column with the matched unique
	# id named "id". You can vary this name by altering the code on line 12.
	#