Tom Hopper tomhopper

## xmrplot.R
library(qcc)
#' The data, from sample published by Donald Wheeler
my.xmr.raw <- c(5045,4350,4350,3975,4290,4430,4485,4285,3980,3925,3645,3760,3300,3685,3463,5200)
#' Create the individuals chart and qcc object
my.xmr.x <- qcc(my.xmr.raw, type = "xbar.one", plot = TRUE)
#' Create the moving range chart and qcc object. qcc takes a two-column matrix
#' that is used to calculate the moving range.
my.xmr.raw.r <- matrix(cbind(my.xmr.raw[1:length(my.xmr.raw)-1], my.xmr.raw[2:length(my.xmr.raw)]), ncol=2)
my.xmr.mr <- qcc(my.xmr.raw.r, type="R", plot = TRUE)

## ggplot_density_plot.r
ggplot(NULL, aes(x=x, colour = distribution)) +
  stat_function(fun=dnorm, data = data.frame(x = c(-6,6), distribution = factor(1)), size = 1) +
  stat_function(fun=dt, args = list( df = 20), data = data.frame(x = c(-6,6), distribution = factor(2)), linetype = "dashed", size = 1) +
  scale_colour_manual(values = c("blue","red"), labels = c("Normal","T-Distribution")) +
  theme(text = element_text(size = 12),
        legend.position = c(0.85, 0.75)) +
  xlim(-4, 4) +
  xlab(NULL) +
  ylab(NULL)

## facet_labelling.R
#' Data frame column names are rarely human-readable, concise and clear, but are usually meaningful. Rather
#' than trying to modify the data, we can provide custom labels for facets.
library(data.table)
library(lubridate)
library(reshape2)
library(ggplot2)

#' Download raw data from "Weather Data" at \link{http://datamonitoring.marec.gvsu.edu/DataDownload.aspx},
#' rename the file to "Marec_weather.csv" and save it to /data/ in the current working directory.

## ggplot2_xkcd_Humor_Sans.R
# The xkcd font used by the package xkcd (which provides a theme for ggplot2)
# is missing many characters and some characters don't seem to display correctly.
# An alternate xkcd-style font is Humor Sans, available free from
# \url{http://antiyawn.com/uploads/humorsans.html}
# The code below forces the use of Humor Sans instead of xkcd.
# The xkcd and ggplot2 packages are available from CRAN.

library(ggplot2)
library(xkcd)

## random_word_vector.R
# Create 2 replicates of 5 "words" generated from random characters,
# each "word" 5 - 15 characters long, with word length following a
# poisson distribution.
rep(replicate(5, paste(sample(letters, round(rpois(5000, lambda = 3)+5, 0), replace = FALSE), collapse = "")), 2)

# Sample output:
# [1] "rfexnwyjst" "vwtadhjnly" "ztfgvldo"   "tmerol"     "mcqhosap"   "rfexnwyjst" "vwtadhjnly" "ztfgvldo"   "tmerol"
#[10] "mcqhosap"

## median_hourly_earnings.R
# from Conrad Hacket
# Median hourly earnings
# \url{https://twitter.com/conradhackett/status/748884076493475840}
# makeover: convert from two groups of side-by-side vertical bar charts to a more readable dot plot
# Demonstrates:
#   Use of in ggplot2
#   Creating dot plots
#   Combining color and shape in a single legend
#   Sorting a dataframe so that categorical data in one column is ordered by a second numerical column
# Note: resulting graph displays best at about 450 pixels x 150 pixels

## addNewData.R
##' Modifies 'data' by adding new values supplied in newDataFileName
##'
##' newDataFileName is expected to have columns
##' c(lookupVariable,lookupValue,newVariable,newValue,source)
##'
##' Within the column 'newVariable', replace values that
##' match 'lookupValue' within column 'lookupVariable' with the value
##' newValue'.  If 'lookupVariable' is NA, then replace *all* elements
##' of 'newVariable' with the value 'newValue'.
##'

## align_common_baseline.R
# Response to a post at Storytelling with Data:
# \url{http://www.storytellingwithdata.com/blog/orytellingwithdata.com/2015/07/align-against-common-baseline.html}
# Demonstrates
#  * Cleveland-style dot plots (improvement over pie and bar charts)
#  * Sorting categorical data by a numerical variable with more than one grouping variable
#  * Highlighting differences between groups graphically

library(ggplot2)
library(scales)

## dt_merge_nodups.R
library(data.table)

# See \link{http://stackoverflow.com/questions/11792527/filtering-out-duplicated-non-unique-rows-in-data-table}
# for a discussion of how to eliminate duplicate rows.
# The problem is that the \code{unique()} function will use a key, if it exists. We need to
# eliminate the key.

# Create one column of data
temp1 <- data.table(sample(letters,size = 15, replace = FALSE))
temp2 <- data.table(sample(letters,size = 15, replace = FALSE))

## strip_na_rows.R
#' Remove rows from data frame containing only NA in pipe-friendly manner
#' @description Accepts a data frame and strips out any rows
#'       containing only \code{NA} values, then returns the resulting data frame.
#' @param A data frame
#' @return A data frame
#' @source \url{http://stackoverflow.com/a/6437778}
strip_na_rows <- function(the_df) {
	the_df[rowSums(is.na(the_df)) != ncol(the_df),]
	return(the_df)
}
	library(qcc)
	#' The data, from sample published by Donald Wheeler
	my.xmr.raw <- c(5045,4350,4350,3975,4290,4430,4485,4285,3980,3925,3645,3760,3300,3685,3463,5200)
	#' Create the individuals chart and qcc object
	my.xmr.x <- qcc(my.xmr.raw, type = "xbar.one", plot = TRUE)
	#' Create the moving range chart and qcc object. qcc takes a two-column matrix
	#' that is used to calculate the moving range.
	my.xmr.raw.r <- matrix(cbind(my.xmr.raw[1:length(my.xmr.raw)-1], my.xmr.raw[2:length(my.xmr.raw)]), ncol=2)
	my.xmr.mr <- qcc(my.xmr.raw.r, type="R", plot = TRUE)
	ggplot(NULL, aes(x=x, colour = distribution)) +
	stat_function(fun=dnorm, data = data.frame(x = c(-6,6), distribution = factor(1)), size = 1) +
	stat_function(fun=dt, args = list( df = 20), data = data.frame(x = c(-6,6), distribution = factor(2)), linetype = "dashed", size = 1) +
	scale_colour_manual(values = c("blue","red"), labels = c("Normal","T-Distribution")) +
	theme(text = element_text(size = 12),
	legend.position = c(0.85, 0.75)) +
	xlim(-4, 4) +
	xlab(NULL) +
	ylab(NULL)
	#' Data frame column names are rarely human-readable, concise and clear, but are usually meaningful. Rather
	#' than trying to modify the data, we can provide custom labels for facets.
	library(data.table)
	library(lubridate)
	library(reshape2)
	library(ggplot2)

	#' Download raw data from "Weather Data" at \link{http://datamonitoring.marec.gvsu.edu/DataDownload.aspx},
	#' rename the file to "Marec_weather.csv" and save it to /data/ in the current working directory.
	# The xkcd font used by the package xkcd (which provides a theme for ggplot2)
	# is missing many characters and some characters don't seem to display correctly.
	# An alternate xkcd-style font is Humor Sans, available free from
	# \url{http://antiyawn.com/uploads/humorsans.html}
	# The code below forces the use of Humor Sans instead of xkcd.
	# The xkcd and ggplot2 packages are available from CRAN.

	library(ggplot2)
	library(xkcd)
	# Create 2 replicates of 5 "words" generated from random characters,
	# each "word" 5 - 15 characters long, with word length following a
	# poisson distribution.
	rep(replicate(5, paste(sample(letters, round(rpois(5000, lambda = 3)+5, 0), replace = FALSE), collapse = "")), 2)

	# Sample output:
	# [1] "rfexnwyjst" "vwtadhjnly" "ztfgvldo" "tmerol" "mcqhosap" "rfexnwyjst" "vwtadhjnly" "ztfgvldo" "tmerol"
	#[10] "mcqhosap"
	# from Conrad Hacket
	# Median hourly earnings
	# \url{https://twitter.com/conradhackett/status/748884076493475840}
	# makeover: convert from two groups of side-by-side vertical bar charts to a more readable dot plot
	# Demonstrates:
	# Use of in ggplot2
	# Creating dot plots
	# Combining color and shape in a single legend
	# Sorting a dataframe so that categorical data in one column is ordered by a second numerical column
	# Note: resulting graph displays best at about 450 pixels x 150 pixels
	##' Modifies 'data' by adding new values supplied in newDataFileName
	##'
	##' newDataFileName is expected to have columns
	##' c(lookupVariable,lookupValue,newVariable,newValue,source)
	##'
	##' Within the column 'newVariable', replace values that
	##' match 'lookupValue' within column 'lookupVariable' with the value
	##' newValue'. If 'lookupVariable' is NA, then replace all elements
	##' of 'newVariable' with the value 'newValue'.
	##'
	# Response to a post at Storytelling with Data:
	# \url{http://www.storytellingwithdata.com/blog/orytellingwithdata.com/2015/07/align-against-common-baseline.html}
	# Demonstrates
	# * Cleveland-style dot plots (improvement over pie and bar charts)
	# * Sorting categorical data by a numerical variable with more than one grouping variable
	# * Highlighting differences between groups graphically

	library(ggplot2)
	library(scales)
	library(data.table)

	# See \link{http://stackoverflow.com/questions/11792527/filtering-out-duplicated-non-unique-rows-in-data-table}
	# for a discussion of how to eliminate duplicate rows.
	# The problem is that the \code{unique()} function will use a key, if it exists. We need to
	# eliminate the key.

	# Create one column of data
	temp1 <- data.table(sample(letters,size = 15, replace = FALSE))
	temp2 <- data.table(sample(letters,size = 15, replace = FALSE))
	#' Remove rows from data frame containing only NA in pipe-friendly manner
	#' @description Accepts a data frame and strips out any rows
	#' containing only \code{NA} values, then returns the resulting data frame.
	#' @param A data frame
	#' @return A data frame
	#' @source \url{http://stackoverflow.com/a/6437778}
	strip_na_rows <- function(the_df) {
	the_df[rowSums(is.na(the_df)) != ncol(the_df),]
	return(the_df)
	}