Sharpie/grainAnalysis.R

## grainAnalysis.R
# Load Data
grainData <- read.csv('grainSize.csv', check.names=F, na.strings='--' )

# Calculate Derived Sample Values
grainData[['Phi Diameter']] <- -log2( grainData[['Grain Diameter']] )
totalWeight <- sum( grainData[['Sample Weight']] )
grainData[["Percent Retained"]] <- grainData[['Sample Weight']] / totalWeight
grainData[["Cumulative Percent"]] <- cumsum( grainData[["Percent Retained"]] )
grainData[['Percent Finer']] <- 1 - grainData[['Cumulative Percent']]

# Create Grain Diameter vs. Percent Finer plot

library(ggplot2)

# qplot() is the basic plotting function in ggplot2.  We pass the name
# of the X variable and then the name of the Y variable-- since we are
# also passing the grainData data frame, the X any Y values are interpreted
# to be the names of columns in grainData.  Otherwise the environment would
# be searched for variables matching X and Y. In this case, the names must
# be enclosed by backtick quotes-- " ` " as the column names contain spaces.

diamVFiner <- qplot( `Grain Diameter`, `Percent Finer`, data = grainData,
  xlab = 'Grain Diameter (mm)', ylab = 'Cumulative Weight Passed (%)' )

# qplot() returns an "object" which we have stored as diamVFiner. The real
# power of ggplot2 now comes into play as we wish to modify the plot by
# adding things such as logarithmic and percentage scales.  This is as
# simple as adding objects together:

# By default the object produced by qplot() contains a scatterplot. Lines
# connecting the points may be added using a geom_line() object:

diamVFiner <- diamVFiner + geom_line()

# A scale object may be added to affect the way in which plot axes are
# scaled:

diamVFiner <- diamVFiner + scale_x_log10() + scale_y_continuous(formatter = "percent")

# The final plot is output to the current plotting device by running it through the
# print() function:

dev.new()
print( diamVFiner )

# Notice that ggplot automatically discards data points where one of the
# coordinates is "NA"-- such as the wash weight (the material that passed
# all the sieves).  The wash really doesn't have a well-defined diameter
# so NA is an appropriate value for it.

# If you don't like the grey background, try adding theme_bw():
dev.new()
print( diamVFiner + theme_bw() )


# Add some interpolated data, such as the location of d10, d50 and d90:


diamStats <- with( grainData, approx( `Percent Finer`, log10(`Grain Diameter`),
  xout = c( 0.1, 0.5, 0.9 ), method = 'linear' ))
diamStats <- as.data.frame( diamStats )
names( diamStats ) <- c( 'Percent Finer', 'Grain Diameter' )
diamStats[['label']] <- c( 'd[10]', 'd[50]', 'd[90]' )

# The transformation to log10() and then reverse transformation using
# 10 ^ is done to correct for the effects of round-off error which
# would cause the interpolated points to no longer lie exactly on the
# lines connecting the data after ggplot applies scale_x_log10()
diamStats[['Grain Diameter']] <- 10 ^ diamStats[['Grain Diameter']]

diamVFiner <- diamVFiner + geom_point( data = diamStats, color = 'red' ) +
  geom_text( aes( x = 0.95 * `Grain Diameter`,
    y = 1.005 * `Percent Finer`,
    label = label ), data = diamStats,
    hjust = 1, vjust = 0, parse = TRUE ) +
  theme_bw()

dev.new()
print( diamVFiner )

# For the Cumulative Probability Curve, we plot Cumulative Percent
# versus phi diameter and use a probability scale for the Y axis:
#
# Note:
# scale_y_probit basically runs all the data values through the
# quantile function for the normal distribution, qnorm(), and
# uses the transformed values as plotting coordinates while
# retaining the untransformed values for use as labels.
# Therefore it is equivalent to:
#
#  qplot( `Phi Diameter`, qnorm( grainData[['Cumulative Percent']] ),
#    data = grainData )
#
# Except the above won't have nice percentage-based labels.

cumProbPlot <- qplot( `Phi Diameter`, `Cumulative Percent`, data = grainData,
  xlab = expression(paste("Phi Diameter  ", -log[2](d),)),
  ylab = 'Cumulative Weight Retained (%)' ) +
  geom_line() +
  scale_y_probit( formatter = 'percent' ) +
  theme_bw()

# Add interpolated data, such as the location of phi84, phi75, ect.

phiDiams <- with( grainData, approx( qnorm(`Cumulative Percent`), `Phi Diameter`,
  xout = qnorm(c( 0.05, 0.16, 0.25, 0.50, 0.75, 0.84, 0.95 )), method = 'linear' ))
phiDiams <- as.data.frame( phiDiams )
names( phiDiams ) <- c( 'Cumulative Percent', 'Phi Diameter' )
phiDiams[['label']] <- paste( 'phi[', c(5,16,25,50,75,84,95), ']', sep = '' )

# Again the qnorm() pnorm() business is to eliminate round-off
# error so that the points line up with the lines on the graph.
#
# Question: Which method actually contains the "error":
#
#   - Interpolating Phi Diameter from directly from Cumulative Percent.
#
#   - Interpolating Phi Diameter from qnorm( Cumulative Percent )--
#     what you would be doing if you plotted the points on
#     probability paper and tried to read the diameters off.

phiDiams[['Cumulative Percent']] <- pnorm( phiDiams[['Cumulative Percent']] )

cumProbPlot <- cumProbPlot +
  geom_point( data = phiDiams, color = 'red' ) +
  geom_text( aes(label = label ), data = phiDiams,
    hjust = 1, vjust = 0, parse = TRUE )

dev.new()
print( cumProbPlot )

## grainSize.csv

          
            Sieve Number
            Grain Diameter
            Sample Weight

            
              5/16
              8.000
              24

            
              3 1/2
              5.600
              3

            
              5
              4.000
              19

            
              7
              2.800
              19.6

            
              10
              2
              27.2

            
              18
              1.000
              84.7

            
              35
              0.500
              90.1

            
              45
              0.355
              115.5

            
              60
              0.250
              93.5

            
              wash
              --
              19.3
	# Load Data
	grainData <- read.csv('grainSize.csv', check.names=F, na.strings='--' )

	# Calculate Derived Sample Values
	grainData[['Phi Diameter']] <- -log2( grainData[['Grain Diameter']] )
	totalWeight <- sum( grainData[['Sample Weight']] )
	grainData[["Percent Retained"]] <- grainData[['Sample Weight']] / totalWeight
	grainData[["Cumulative Percent"]] <- cumsum( grainData[["Percent Retained"]] )
	grainData[['Percent Finer']] <- 1 - grainData[['Cumulative Percent']]

	# Create Grain Diameter vs. Percent Finer plot

	library(ggplot2)

	# qplot() is the basic plotting function in ggplot2. We pass the name
	# of the X variable and then the name of the Y variable-- since we are
	# also passing the grainData data frame, the X any Y values are interpreted
	# to be the names of columns in grainData. Otherwise the environment would
	# be searched for variables matching X and Y. In this case, the names must
	# be enclosed by backtick quotes-- " ` " as the column names contain spaces.

	diamVFiner <- qplot( `Grain Diameter`, `Percent Finer`, data = grainData,
	xlab = 'Grain Diameter (mm)', ylab = 'Cumulative Weight Passed (%)' )

	# qplot() returns an "object" which we have stored as diamVFiner. The real
	# power of ggplot2 now comes into play as we wish to modify the plot by
	# adding things such as logarithmic and percentage scales. This is as
	# simple as adding objects together:

	# By default the object produced by qplot() contains a scatterplot. Lines
	# connecting the points may be added using a geom_line() object:

	diamVFiner <- diamVFiner + geom_line()

	# A scale object may be added to affect the way in which plot axes are
	# scaled:

	diamVFiner <- diamVFiner + scale_x_log10() + scale_y_continuous(formatter = "percent")

	# The final plot is output to the current plotting device by running it through the
	# print() function:

	dev.new()
	print( diamVFiner )

	# Notice that ggplot automatically discards data points where one of the
	# coordinates is "NA"-- such as the wash weight (the material that passed
	# all the sieves). The wash really doesn't have a well-defined diameter
	# so NA is an appropriate value for it.

	# If you don't like the grey background, try adding theme_bw():
	dev.new()
	print( diamVFiner + theme_bw() )


	# Add some interpolated data, such as the location of d10, d50 and d90:


	diamStats <- with( grainData, approx( `Percent Finer`, log10(`Grain Diameter`),
	xout = c( 0.1, 0.5, 0.9 ), method = 'linear' ))
	diamStats <- as.data.frame( diamStats )
	names( diamStats ) <- c( 'Percent Finer', 'Grain Diameter' )
	diamStats[['label']] <- c( 'd[10]', 'd[50]', 'd[90]' )

	# The transformation to log10() and then reverse transformation using
	# 10 ^ is done to correct for the effects of round-off error which
	# would cause the interpolated points to no longer lie exactly on the
	# lines connecting the data after ggplot applies scale_x_log10()
	diamStats[['Grain Diameter']] <- 10 ^ diamStats[['Grain Diameter']]

	diamVFiner <- diamVFiner + geom_point( data = diamStats, color = 'red' ) +
	geom_text( aes( x = 0.95 * `Grain Diameter`,
	y = 1.005 * `Percent Finer`,
	label = label ), data = diamStats,
	hjust = 1, vjust = 0, parse = TRUE ) +
	theme_bw()

	dev.new()
	print( diamVFiner )

	# For the Cumulative Probability Curve, we plot Cumulative Percent
	# versus phi diameter and use a probability scale for the Y axis:
	#
	# Note:
	# scale_y_probit basically runs all the data values through the
	# quantile function for the normal distribution, qnorm(), and
	# uses the transformed values as plotting coordinates while
	# retaining the untransformed values for use as labels.
	# Therefore it is equivalent to:
	#
	# qplot( `Phi Diameter`, qnorm( grainData[['Cumulative Percent']] ),
	# data = grainData )
	#
	# Except the above won't have nice percentage-based labels.

	cumProbPlot <- qplot( `Phi Diameter`, `Cumulative Percent`, data = grainData,
	xlab = expression(paste("Phi Diameter ", -log[2](d),)),
	ylab = 'Cumulative Weight Retained (%)' ) +
	geom_line() +
	scale_y_probit( formatter = 'percent' ) +
	theme_bw()

	# Add interpolated data, such as the location of phi84, phi75, ect.

	phiDiams <- with( grainData, approx( qnorm(`Cumulative Percent`), `Phi Diameter`,
	xout = qnorm(c( 0.05, 0.16, 0.25, 0.50, 0.75, 0.84, 0.95 )), method = 'linear' ))
	phiDiams <- as.data.frame( phiDiams )
	names( phiDiams ) <- c( 'Cumulative Percent', 'Phi Diameter' )
	phiDiams[['label']] <- paste( 'phi[', c(5,16,25,50,75,84,95), ']', sep = '' )

	# Again the qnorm() pnorm() business is to eliminate round-off
	# error so that the points line up with the lines on the graph.
	#
	# Question: Which method actually contains the "error":
	#
	# - Interpolating Phi Diameter from directly from Cumulative Percent.
	#
	# - Interpolating Phi Diameter from qnorm( Cumulative Percent )--
	# what you would be doing if you plotted the points on
	# probability paper and tried to read the diameters off.

	phiDiams[['Cumulative Percent']] <- pnorm( phiDiams[['Cumulative Percent']] )

	cumProbPlot <- cumProbPlot +
	geom_point( data = phiDiams, color = 'red' ) +
	geom_text( aes(label = label ), data = phiDiams,
	hjust = 1, vjust = 0, parse = TRUE )

	dev.new()
	print( cumProbPlot )
Sieve Number	Grain Diameter	Sample Weight
5/16	8.000	24
3 1/2	5.600	3
5	4.000	19
7	2.800	19.6
10	2	27.2
18	1.000	84.7
35	0.500	90.1
45	0.355	115.5
60	0.250	93.5
wash	--	19.3