Skip to content

Instantly share code, notes, and snippets.

@aaiezza
Created June 27, 2017 14:15
Show Gist options
  • Save aaiezza/1df71151875536de051a1b9628c5cb1c to your computer and use it in GitHub Desktop.
Save aaiezza/1df71151875536de051a1b9628c5cb1c to your computer and use it in GitHub Desktop.
ggplot2 plotting function for a Normal Distribution with lightly shaded and labeled standard deviations from the mean. User may also give lower and upper bounds, which adds shading under the curve between those points and a label with the area under the curve. (Made to accommodate for Mathematical Statistics with Applications 7th Ed. - WMS 2008)
##
# Standard function for printing Normal Distribution in ggplot2
# # # #
plotNormCurve <- function( mean = 0, sd = 1, variance = sd ^ 2,
lowerBound = NULL, upperBound = NULL,
sdRange = 3.5,
from = mean - (sdRange * sd),
to = mean + (sdRange * sd),
plotTitle = 'Normal Curve', xlab = 'x', ylab = 'density'
)
{
sd <- sqrt( variance )
# This calculation protects us from an altered x-range
lowerSD <- (mean - from) / sd
upperSD <- ( to - mean) / sd
normPlot <- ggplot() + aes( x = c( from, to ) ) +
####
# Figuring out what the x scale should be
scale_x_continuous( breaks = function(li)
{
br <- seq( ceiling(li[1]), li[2], ifelse( log(sd) >= 0, 1, abs(log(-log(sd))) ) )
if ( length( br ) > 30 || log(sd) < 0 )
{
numOfBreaks <- ceiling( min( sdRange + 6, 10 ) )
# cat( sprintf( 'numOfBreaks = %d\n', numOfBreaks ) )
ltmean <- seq( li[1], mean, length.out = numOfBreaks%/%2 )
# cat( sprintf( 'lb = %f, ltmean = %s\n', li[1], paste( ltmean, collapse = ', ' ) ) )
gtmean <- seq( mean, li[2], length.out = numOfBreaks%/%2 )
# cat( sprintf( 'ub = %f, gtmean = %s\n', li[2], paste( gtmean, collapse = ', ' ) ) )
br <- c( ltmean[-length(ltmean)],
mean,
gtmean[-1] )
}
roundDigits <- ceiling(max(2,ifelse( sd == 1 || log(sd) >= 0, 0, abs(log(-log(sd)))*2 ) ) )
# cat( sprintf( 'roundDigits = %d\n', roundDigits ) )
return( round( br, roundDigits ) )
} ) +
####
# Title and axis titles
labs( title = plotTitle, x = xlab, y = ylab ) +
theme_bw() +
theme( plot.title = element_text( hjust = 0.5 ),
panel.grid.major = element_line( size = 1.0 ),
axis.ticks.length = unit( 5, 'pt' ),
plot.margin = unit( c( 2, 2, 2, 2 ), 'lines'))#,
#aspect.ratio = 1/φ )
####
# Shade every standard deviations from the mean
lowerSDOut <- -floor( lowerSD )
upperSDOut <- floor( upperSD )
sdLi <- mean + sd * lowerSDOut:upperSDOut
sdL <- data.frame(
x = sdLi,
xend = sdLi,
y = 0,
yend = dnorm(sdLi,mean,sd)
)
shadeBounds <- rbind( c( from, sdL$x ), c( sdL$x, to ) )
## TODO don't assume symmetry! Numbers should be based on area's distance from the mean!
sdLi <- c( 1:ceiling(ncol(shadeBounds)/2),
(ceiling(ncol(shadeBounds)/2) -
ifelse( ncol(shadeBounds)%%2 == 1, 1, 0 ) ):1 )
shadeBounds <- rbind( shadeBounds, sdLi )
rownames( shadeBounds ) <- NULL
shades <- apply( shadeBounds, 2, function( x, i )
{
areax <- seq( x[1], x[2], length.out = 100 )
data.frame( x = areax, ymin = 0, ymax = dnorm( areax, mean, sd ), id = x[3] )
} )
for ( shade in shades )
{
normPlot <- normPlot + geom_ribbon( data = shade, aes(
x = x, ymin = ymin, ymax = ymax, alpha = factor(id) ),
fill = 'lightgrey', color = NA,
show.legend = FALSE )
}
#####
normPlot <- normPlot + scale_alpha_manual( name = NULL,
breaks = levels( factor( sdLi ) ),
values = ( as.double( levels( factor( sdLi ) ) ) * 0.185 )^2 )
#####
normPlot <- normPlot + geom_segment( data = sdL, aes(
x = x, y = y, xend = xend, yend = yend ),
linetype = 'dotted', size = 0.3 )
area <- list()
####
# User-defined shading under the curve
if ( !missing( lowerBound ) && !missing( upperBound ) )
{
for ( i in 1:length(lowerBound) )
{
xmin <- max( lowerBound[i], from )
xmax <- min( upperBound[i], to )
areax <- seq( xmin, xmax, length.out = 100 )
area[[i]] <- data.frame(
x = areax,
ymin = 0,
ymax = dnorm( areax, mean = mean, sd = sd ) )
getLabel <- function( x )
{
sapply( x, function( j )
{
if ( lowerBound[i] < xmin && j == xmin )
return( lowerBound[i] )
else if ( upperBound[i] > xmax && j == xmax)
return( upperBound[i] )
else
return( j )
} )
}
# User-defined shading and dashed lines at cutoffs
normPlot <- normPlot + geom_ribbon( data = area[[i]], aes(
x = x, ymin = ymin, ymax = ymax ),
fill = 'lightblue', alpha = 0.7 ) +
geom_segment( data = area[[i]][c(1,nrow(area[[i]])),],
aes( x = x, y = ymin, xend = x, yend = ymax ),
linetype = '42', size = 0.4 )
}
}
####
# The actual normal Curve
normPlot <- normPlot + stat_function( fun = dnorm,
args = list( mean = mean, sd = sd ) ) +
####
# Dashed line for the average
geom_segment( aes(
x = mean, y = 0, xend = mean, yend = dnorm(mean,mean,sd) ),
linetype = '82', size = 0.55 )
####
# Label every standard deviations from the mean
normPlot <- normPlot + geom_label( data = data.frame(
x = sdL$x, y = sdL$yend ),
aes( x = x, y = y, label = x ), label.padding = unit(0.15, "lines"),
vjust = 0.0, size = 2.6, alpha = 0.8, hjust = 'outward',
label.size = 0.25 )
if ( !missing( lowerBound ) && !missing( upperBound ) )
{
for ( i in 1:length(lowerBound) )
{
# The labels on the User-defined bounds for shaded region
normPlot <- normPlot + geom_label_repel(
data = area[[i]][c(1,nrow(area[[i]])),],
aes( x = x, y = ymax*7/8, label = getLabel( x ) ),
fontface = 'italic', color = 'black', fill = 'lightgrey',
show.legend = FALSE,
size = 2.6, force = 0,
label.size = 0.25,
segment.size = 0.3,
arrow = arrow(25, unit(0.01,'npc')),
box.padding = unit( 0.02, 'npc' ),
point.padding = unit( 0.6, 'lines' ),
nudge_x = 0.1
)
# The label of probability under the curve
pLabel.x <- meanAB( mean, sd, lowerBound[i], upperBound[i] )
pLabel.y <- dnorm( pLabel.x, mean, sd )
normPlot <- normPlot + geom_label_repel(
data = data.frame(
x = pLabel.x,
y = pLabel.y / 2,
label = sprintf( "P = %.4f\n( %.3f )",
pnormAB( mean, sd, lowerBound[i], upperBound[i] ), pLabel.x ) ),
aes( x = x, y = y, label = label ),
fontface = 'italic', color = 'black', fill = 'lightgrey',
show.legend = FALSE,
size = 2.5, force = 2,
label.size = 0.25,
segment.size = 0.3,
arrow = arrow(25, unit(0.01,'npc')),
box.padding = unit( 0.02, 'npc' ),
point.padding = unit( 0.6, 'lines' )
)
}
}
return( normPlot )
}
##
# Calculates the Expected value of a Truncated Normal Distribution
# # # #
expectedValueOfNormalXbetweenAandB <- meanAB <- function( mean, sd, a, b )
{
µ <- mean
σ <- sd
µ + ( σ *
( ( exp( -(((a-µ)^2)/(2*(σ^2))) ) - exp( -(((b-µ)^2)/(2*(σ^2))) ) ) /
( sqrt(pi/2) *
( erf( (b-µ) / (sqrt(2)*σ) )
- erf( (a-µ) / (sqrt(2)*σ) )
)
)
)
)
}
##
# Calculates area under a normal curve from lowerBound to upperBound
# # # #
pnormAB <- function( mean, sd, lowerBound, upperBound )
{
return(
pnorm( upperBound, mean, sd ) -
pnorm( lowerBound, mean, sd ) )
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment