aaiezza/plotNorm.R

## plotNorm.R
##
# Standard function for printing Normal Distribution in ggplot2
# # # #
plotNormCurve <- function(  mean = 0, sd = 1, variance = sd ^ 2,
    lowerBound = NULL, upperBound = NULL,
    sdRange = 3.5,
    from = mean - (sdRange * sd),
    to = mean + (sdRange * sd),
    plotTitle = 'Normal Curve', xlab = 'x', ylab = 'density'
)
{
    sd <- sqrt( variance )

    # This calculation protects us from an altered x-range
    lowerSD <- (mean - from) / sd
    upperSD <- ( to  - mean) / sd

    normPlot <- ggplot() + aes( x = c( from, to ) ) +

    ####
    # Figuring out what the x scale should be
    scale_x_continuous( breaks = function(li)
    {
        br <- seq( ceiling(li[1]), li[2], ifelse( log(sd) >= 0, 1, abs(log(-log(sd))) ) )
        if ( length( br ) > 30 || log(sd) < 0 )
        {
            numOfBreaks <- ceiling( min( sdRange + 6, 10 ) )
                # cat( sprintf( 'numOfBreaks = %d\n', numOfBreaks ) )
            ltmean <- seq( li[1], mean, length.out = numOfBreaks%/%2 )
                # cat( sprintf( 'lb = %f, ltmean = %s\n', li[1], paste( ltmean, collapse = ', ' ) ) )
            gtmean <- seq( mean, li[2], length.out = numOfBreaks%/%2 )
                # cat( sprintf( 'ub = %f, gtmean = %s\n', li[2], paste( gtmean, collapse = ', ' ) ) )
            br <- c( ltmean[-length(ltmean)],
                     mean,
                     gtmean[-1] )
        }
        roundDigits <- ceiling(max(2,ifelse( sd == 1 || log(sd) >= 0, 0, abs(log(-log(sd)))*2 ) ) )
            # cat( sprintf( 'roundDigits = %d\n', roundDigits ) )

        return( round( br, roundDigits ) )
    } ) +

    ####
    # Title and axis titles
    labs( title = plotTitle, x = xlab, y = ylab ) +

    theme_bw() +
    theme(        plot.title = element_text( hjust = 0.5 ),
            panel.grid.major = element_line( size = 1.0 ),
           axis.ticks.length = unit( 5, 'pt' ),
                 plot.margin = unit( c( 2, 2, 2, 2 ), 'lines'))#,
                #aspect.ratio = 1/φ )

    ####
    # Shade every standard deviations from the mean
    lowerSDOut <- -floor( lowerSD )
    upperSDOut <-  floor( upperSD )
    sdLi <- mean + sd * lowerSDOut:upperSDOut
    sdL <- data.frame(
        x = sdLi,
        xend = sdLi,
        y = 0,
        yend = dnorm(sdLi,mean,sd)
    )

    shadeBounds <- rbind( c( from, sdL$x ), c( sdL$x,  to  ) )
    ## TODO don't assume symmetry! Numbers should be based on area's distance from the mean!
    sdLi <- c( 1:ceiling(ncol(shadeBounds)/2),
        (ceiling(ncol(shadeBounds)/2) -
            ifelse( ncol(shadeBounds)%%2 == 1, 1, 0 ) ):1 )
    shadeBounds <- rbind( shadeBounds, sdLi )
    rownames( shadeBounds ) <- NULL

    shades <- apply( shadeBounds, 2, function( x, i )
    {
        areax <- seq( x[1], x[2], length.out = 100 )
        data.frame( x = areax, ymin = 0, ymax = dnorm( areax, mean, sd ), id = x[3] )
    } )

    for ( shade in shades )
    {
        normPlot <- normPlot + geom_ribbon( data = shade, aes(
                x = x, ymin = ymin, ymax = ymax, alpha = factor(id) ),
                fill = 'lightgrey', color = NA,
            show.legend = FALSE )
    }
    #####
    normPlot <- normPlot + scale_alpha_manual( name = NULL,
            breaks = levels( factor( sdLi ) ),
            values = ( as.double( levels( factor( sdLi ) ) ) * 0.185 )^2 )
    #####
    normPlot <- normPlot + geom_segment( data = sdL, aes(
                x = x, y = y, xend = xend, yend = yend ),
            linetype = 'dotted', size = 0.3 )

    area <- list()

    ####
    # User-defined shading under the curve
    if ( !missing( lowerBound ) && !missing( upperBound ) )
    {
        for ( i in 1:length(lowerBound) )
        {
            xmin <- max( lowerBound[i], from )
            xmax <- min( upperBound[i],  to  )

            areax <- seq( xmin, xmax, length.out = 100 )
            area[[i]] <- data.frame(
                x = areax,
                ymin = 0,
                ymax = dnorm( areax, mean = mean, sd = sd ) )

            getLabel <- function( x )
            {
                sapply( x, function( j )
                {
                    if ( lowerBound[i] < xmin && j == xmin )
                        return( lowerBound[i] )
                    else if ( upperBound[i] > xmax && j == xmax)
                        return( upperBound[i] )
                    else
                        return( j )
                } )
            }

            # User-defined shading and dashed lines at cutoffs
            normPlot <- normPlot + geom_ribbon( data = area[[i]], aes(
                    x = x, ymin = ymin, ymax = ymax ),
                    fill = 'lightblue', alpha = 0.7 ) +
            geom_segment( data = area[[i]][c(1,nrow(area[[i]])),],
                aes( x = x, y = ymin, xend = x, yend = ymax ),
                linetype = '42', size = 0.4 )
        }
    }

    ####
    # The actual normal Curve
    normPlot <- normPlot + stat_function( fun = dnorm,
        args = list( mean = mean, sd = sd ) ) +

    ####
    # Dashed line for the average
    geom_segment( aes(
            x = mean, y = 0, xend = mean, yend = dnorm(mean,mean,sd) ),
        linetype = '82', size = 0.55 )

    ####
    # Label every standard deviations from the mean
    normPlot <- normPlot + geom_label( data = data.frame(
            x = sdL$x, y = sdL$yend ),
        aes( x = x, y = y, label = x ), label.padding = unit(0.15, "lines"),
        vjust = 0.0, size = 2.6, alpha = 0.8, hjust = 'outward',
        label.size = 0.25 )

    if ( !missing( lowerBound ) && !missing( upperBound ) )
    {
        for ( i in 1:length(lowerBound) )
        {
            # The labels on the User-defined bounds for shaded region
            normPlot <- normPlot + geom_label_repel(
                data = area[[i]][c(1,nrow(area[[i]])),],
                aes( x = x, y = ymax*7/8, label = getLabel( x ) ),
                fontface = 'italic', color = 'black', fill = 'lightgrey',
                show.legend = FALSE,
                size = 2.6, force = 0,
                label.size = 0.25,
                segment.size = 0.3,
                arrow = arrow(25, unit(0.01,'npc')),
                box.padding = unit( 0.02, 'npc' ),
                point.padding = unit( 0.6, 'lines' ),
                nudge_x = 0.1
            )

            # The label of probability under the curve
            pLabel.x <- meanAB( mean, sd, lowerBound[i], upperBound[i] )
            pLabel.y <- dnorm( pLabel.x, mean, sd )
            normPlot <- normPlot + geom_label_repel(
                data = data.frame(
                    x = pLabel.x,
                    y = pLabel.y / 2,
                    label = sprintf( "P = %.4f\n( %.3f )",
                        pnormAB( mean, sd, lowerBound[i], upperBound[i] ), pLabel.x ) ),
                aes( x = x, y = y, label = label ),
                fontface = 'italic', color = 'black', fill = 'lightgrey',
                show.legend = FALSE,
                size = 2.5, force = 2,
                label.size = 0.25,
                segment.size = 0.3,
                arrow = arrow(25, unit(0.01,'npc')),
                box.padding = unit( 0.02, 'npc' ),
                point.padding = unit( 0.6, 'lines' )
            )
        }
    }

    return( normPlot )
}

##
# Calculates the Expected value of a Truncated Normal Distribution
# # # #
expectedValueOfNormalXbetweenAandB <- meanAB <- function( mean, sd, a, b )
{
    µ <- mean
    σ <- sd
    µ + ( σ *
            ( ( exp( -(((a-µ)^2)/(2*(σ^2))) ) - exp( -(((b-µ)^2)/(2*(σ^2))) ) ) /
              ( sqrt(pi/2) *
                (  erf( (b-µ) / (sqrt(2)*σ) )
                 - erf( (a-µ) / (sqrt(2)*σ) )
                )
              )
            )
        )
}

##
# Calculates area under a normal curve from lowerBound to upperBound
# # # #
pnormAB <- function( mean, sd, lowerBound, upperBound )
{
    return(
        pnorm( upperBound, mean, sd ) -
        pnorm( lowerBound, mean, sd ) )
}
	##
	# Standard function for printing Normal Distribution in ggplot2
	# # # #
	plotNormCurve <- function( mean = 0, sd = 1, variance = sd ^ 2,
	lowerBound = NULL, upperBound = NULL,
	sdRange = 3.5,
	from = mean - (sdRange * sd),
	to = mean + (sdRange * sd),
	plotTitle = 'Normal Curve', xlab = 'x', ylab = 'density'
	)
	{
	sd <- sqrt( variance )

	# This calculation protects us from an altered x-range
	lowerSD <- (mean - from) / sd
	upperSD <- ( to - mean) / sd

	normPlot <- ggplot() + aes( x = c( from, to ) ) +

	####
	# Figuring out what the x scale should be
	scale_x_continuous( breaks = function(li)
	{
	br <- seq( ceiling(li[1]), li[2], ifelse( log(sd) >= 0, 1, abs(log(-log(sd))) ) )
	if ( length( br ) > 30 \|\| log(sd) < 0 )
	{
	numOfBreaks <- ceiling( min( sdRange + 6, 10 ) )
	# cat( sprintf( 'numOfBreaks = %d\n', numOfBreaks ) )
	ltmean <- seq( li[1], mean, length.out = numOfBreaks%/%2 )
	# cat( sprintf( 'lb = %f, ltmean = %s\n', li[1], paste( ltmean, collapse = ', ' ) ) )
	gtmean <- seq( mean, li[2], length.out = numOfBreaks%/%2 )
	# cat( sprintf( 'ub = %f, gtmean = %s\n', li[2], paste( gtmean, collapse = ', ' ) ) )
	br <- c( ltmean[-length(ltmean)],
	mean,
	gtmean[-1] )
	}
	roundDigits <- ceiling(max(2,ifelse( sd == 1 \|\| log(sd) >= 0, 0, abs(log(-log(sd)))*2 ) ) )
	# cat( sprintf( 'roundDigits = %d\n', roundDigits ) )

	return( round( br, roundDigits ) )
	} ) +

	####
	# Title and axis titles
	labs( title = plotTitle, x = xlab, y = ylab ) +

	theme_bw() +
	theme( plot.title = element_text( hjust = 0.5 ),
	panel.grid.major = element_line( size = 1.0 ),
	axis.ticks.length = unit( 5, 'pt' ),
	plot.margin = unit( c( 2, 2, 2, 2 ), 'lines'))#,
	#aspect.ratio = 1/φ )

	####
	# Shade every standard deviations from the mean
	lowerSDOut <- -floor( lowerSD )
	upperSDOut <- floor( upperSD )
	sdLi <- mean + sd * lowerSDOut:upperSDOut
	sdL <- data.frame(
	x = sdLi,
	xend = sdLi,
	y = 0,
	yend = dnorm(sdLi,mean,sd)
	)

	shadeBounds <- rbind( c( from, sdL$x ), c( sdL$x, to ) )
	## TODO don't assume symmetry! Numbers should be based on area's distance from the mean!
	sdLi <- c( 1:ceiling(ncol(shadeBounds)/2),
	(ceiling(ncol(shadeBounds)/2) -
	ifelse( ncol(shadeBounds)%%2 == 1, 1, 0 ) ):1 )
	shadeBounds <- rbind( shadeBounds, sdLi )
	rownames( shadeBounds ) <- NULL

	shades <- apply( shadeBounds, 2, function( x, i )
	{
	areax <- seq( x[1], x[2], length.out = 100 )
	data.frame( x = areax, ymin = 0, ymax = dnorm( areax, mean, sd ), id = x[3] )
	} )

	for ( shade in shades )
	{
	normPlot <- normPlot + geom_ribbon( data = shade, aes(
	x = x, ymin = ymin, ymax = ymax, alpha = factor(id) ),
	fill = 'lightgrey', color = NA,
	show.legend = FALSE )
	}
	#####
	normPlot <- normPlot + scale_alpha_manual( name = NULL,
	breaks = levels( factor( sdLi ) ),
	values = ( as.double( levels( factor( sdLi ) ) ) * 0.185 )^2 )
	#####
	normPlot <- normPlot + geom_segment( data = sdL, aes(
	x = x, y = y, xend = xend, yend = yend ),
	linetype = 'dotted', size = 0.3 )

	area <- list()

	####
	# User-defined shading under the curve
	if ( !missing( lowerBound ) && !missing( upperBound ) )
	{
	for ( i in 1:length(lowerBound) )
	{
	xmin <- max( lowerBound[i], from )
	xmax <- min( upperBound[i], to )

	areax <- seq( xmin, xmax, length.out = 100 )
	area[[i]] <- data.frame(
	x = areax,
	ymin = 0,
	ymax = dnorm( areax, mean = mean, sd = sd ) )

	getLabel <- function( x )
	{
	sapply( x, function( j )
	{
	if ( lowerBound[i] < xmin && j == xmin )
	return( lowerBound[i] )
	else if ( upperBound[i] > xmax && j == xmax)
	return( upperBound[i] )
	else
	return( j )
	} )
	}

	# User-defined shading and dashed lines at cutoffs
	normPlot <- normPlot + geom_ribbon( data = area[[i]], aes(
	x = x, ymin = ymin, ymax = ymax ),
	fill = 'lightblue', alpha = 0.7 ) +
	geom_segment( data = area[[i]][c(1,nrow(area[[i]])),],
	aes( x = x, y = ymin, xend = x, yend = ymax ),
	linetype = '42', size = 0.4 )
	}
	}

	####
	# The actual normal Curve
	normPlot <- normPlot + stat_function( fun = dnorm,
	args = list( mean = mean, sd = sd ) ) +

	####
	# Dashed line for the average
	geom_segment( aes(
	x = mean, y = 0, xend = mean, yend = dnorm(mean,mean,sd) ),
	linetype = '82', size = 0.55 )

	####
	# Label every standard deviations from the mean
	normPlot <- normPlot + geom_label( data = data.frame(
	x = sdL$x, y = sdL$yend ),
	aes( x = x, y = y, label = x ), label.padding = unit(0.15, "lines"),
	vjust = 0.0, size = 2.6, alpha = 0.8, hjust = 'outward',
	label.size = 0.25 )

	if ( !missing( lowerBound ) && !missing( upperBound ) )
	{
	for ( i in 1:length(lowerBound) )
	{
	# The labels on the User-defined bounds for shaded region
	normPlot <- normPlot + geom_label_repel(
	data = area[[i]][c(1,nrow(area[[i]])),],
	aes( x = x, y = ymax*7/8, label = getLabel( x ) ),
	fontface = 'italic', color = 'black', fill = 'lightgrey',
	show.legend = FALSE,
	size = 2.6, force = 0,
	label.size = 0.25,
	segment.size = 0.3,
	arrow = arrow(25, unit(0.01,'npc')),
	box.padding = unit( 0.02, 'npc' ),
	point.padding = unit( 0.6, 'lines' ),
	nudge_x = 0.1
	)

	# The label of probability under the curve
	pLabel.x <- meanAB( mean, sd, lowerBound[i], upperBound[i] )
	pLabel.y <- dnorm( pLabel.x, mean, sd )
	normPlot <- normPlot + geom_label_repel(
	data = data.frame(
	x = pLabel.x,
	y = pLabel.y / 2,
	label = sprintf( "P = %.4f\n( %.3f )",
	pnormAB( mean, sd, lowerBound[i], upperBound[i] ), pLabel.x ) ),
	aes( x = x, y = y, label = label ),
	fontface = 'italic', color = 'black', fill = 'lightgrey',
	show.legend = FALSE,
	size = 2.5, force = 2,
	label.size = 0.25,
	segment.size = 0.3,
	arrow = arrow(25, unit(0.01,'npc')),
	box.padding = unit( 0.02, 'npc' ),
	point.padding = unit( 0.6, 'lines' )
	)
	}
	}

	return( normPlot )
	}

	##
	# Calculates the Expected value of a Truncated Normal Distribution
	# # # #
	expectedValueOfNormalXbetweenAandB <- meanAB <- function( mean, sd, a, b )
	{
	µ <- mean
	σ <- sd
	µ + ( σ *
	( ( exp( -(((a-µ)^2)/(2(σ^2))) ) - exp( -(((b-µ)^2)/(2(σ^2))) ) ) /
	( sqrt(pi/2) *
	( erf( (b-µ) / (sqrt(2)*σ) )
	- erf( (a-µ) / (sqrt(2)*σ) )
	)
	)
	)
	)
	}

	##
	# Calculates area under a normal curve from lowerBound to upperBound
	# # # #
	pnormAB <- function( mean, sd, lowerBound, upperBound )
	{
	return(
	pnorm( upperBound, mean, sd ) -
	pnorm( lowerBound, mean, sd ) )
	}