jkeirstead/epsrc-fellowships-funnel.r

## epsrc-fellowships-funnel.r
# Analysis of EPSRC fellowship success rates
# 18 November 2013
# James Keirstead

##' Calculates the bounds for a funnel plot
##'
##' Calculates the upper and lower confidence interval limits for use
##' in a funnel plot.  Assumes a binomial discrete distribution with a
##' probability of success theta.
##'
##' @param n the maximum number of trials in the data set
##' @param theta the average success rate in the data set
##' @param alpha a numeric giving the confidence interval, default = 0.95
##' @param method a numeric giving the method to use ("exact", "spiegelhalter")
##' @return a data frame giving the upper and lower confidence
##' intervals on success rate
##' @source Spiegelhalter, D. J. (2005). Funnel plots for comparing
##' institutional performance. Statistics in Medicine, 24(8),
##' 1185--202. doi:10.1002/sim.1970
##'
calculate_funnel_limits <- function(n, theta, alpha=0.95, method="exact") {

    ## Convert alpha into p
    p <- (1 - alpha)/2

    if (method=="exact") {
        ## Calculate the exact binomial counts
        ## Because this is a discrete distribution
        ## these limits will appear very jumpy
        upper <- qbinom(1 - p, 1:n, theta)/1:n
        lower <- qbinom(p, 1:n, theta)/1:n
    } else if (method=="spiegelhalter") {

        ## Calculate the confidence limits
        ## using Spiegelhalter's interpolation
        u1 <- qbinom(1-p, 1:n, theta)
        p.u1 <- pbinom(u1, 1:n, theta)
        u2 <- qbinom(1-p, 1:n, theta)-1
        p.u2 <- pbinom(u2, 1:n, theta)
        a <- ((1-p) - p.u2)/(p.u1-p.u2)
        upper <- (u2 + a*(u1-u2))/1:n

        l1 <- qbinom(p, 1:n, theta) - 1
        l1 <- replace(l1, l1<0, 0)
        p.l1 <- pbinom(l1, 1:n, theta)
        l2 <- qbinom(p, 1:n, theta)
        p.l2 <- pbinom(l2, 1:n, theta)
        a <- (p - p.l1)/(p.l2-p.l1)
        lower <- (l1 + a*(l2-l1))/1:n
    }

    return(data.frame(x=1:n, up=upper, lo=lower))
}

## Raw data on success rates in EPSRC Fellowships
## http://www.epsrc.ac.uk/skills/fellows/Pages/fellowships.aspx
data <- data.frame(rate=c(0.3, 0.17, 0.16, 0.1, 0.37, 0.33, 0.31, 0, 0),
                   n=c(33, 161, 47, 43, 32, 41, 104, 5, 5),
                   label=c("Engineering", "Physical Sciences", "Energy", "Healthcare",
                       "Manufacturing the Future", "ICT", "Mathematics",
                       "Digital Economy", "Living With Environmental Change"),
                   halign=c(0, 1, 0, 0, 0, 0, 0, 0, 0),
                   offset=c(1.5, -1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5),
                   valign=c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 1))


## Calculate the expected response rate as a weighted average
n <- max(data$n)
theta <- with(data,sum(rate*n)/sum(n))

## Calculate the funnel plot limits and transform
require(reshape2)
tmp <- calculate_funnel_limits(n, theta, method="spiegelhalter")
tmp <- melt(tmp, id="x")

## Plot the results
require(ggplot2)
require(scales)
gg <- ggplot(data, aes(x=n, y=rate)) +
    geom_line(data=tmp, aes(x=x, y=value, group=variable), colour="#AAAAAA", linetype="dashed") +
    geom_point() +
    geom_text(aes(x=n+offset, label=label, hjust=halign, vjust=valign), size=3) +
    geom_hline(data=NULL, aes(yintercept=theta), color="red") +
    geom_text(data=NULL, aes(x=max(data$n), hjust=1, vjust=0, y=theta, label="Average"), size=4) +
    labs(x="Number of applications", y="Success rate") +
    scale_y_continuous(label=percent) +
    theme_bw()

ggsave("epsrc-funnel.png", gg, width=8, height=6, dpi=150)
	# Analysis of EPSRC fellowship success rates
	# 18 November 2013
	# James Keirstead

	##' Calculates the bounds for a funnel plot
	##'
	##' Calculates the upper and lower confidence interval limits for use
	##' in a funnel plot. Assumes a binomial discrete distribution with a
	##' probability of success theta.
	##'
	##' @param n the maximum number of trials in the data set
	##' @param theta the average success rate in the data set
	##' @param alpha a numeric giving the confidence interval, default = 0.95
	##' @param method a numeric giving the method to use ("exact", "spiegelhalter")
	##' @return a data frame giving the upper and lower confidence
	##' intervals on success rate
	##' @source Spiegelhalter, D. J. (2005). Funnel plots for comparing
	##' institutional performance. Statistics in Medicine, 24(8),
	##' 1185--202. doi:10.1002/sim.1970
	##'
	calculate_funnel_limits <- function(n, theta, alpha=0.95, method="exact") {

	## Convert alpha into p
	p <- (1 - alpha)/2

	if (method=="exact") {
	## Calculate the exact binomial counts
	## Because this is a discrete distribution
	## these limits will appear very jumpy
	upper <- qbinom(1 - p, 1:n, theta)/1:n
	lower <- qbinom(p, 1:n, theta)/1:n
	} else if (method=="spiegelhalter") {

	## Calculate the confidence limits
	## using Spiegelhalter's interpolation
	u1 <- qbinom(1-p, 1:n, theta)
	p.u1 <- pbinom(u1, 1:n, theta)
	u2 <- qbinom(1-p, 1:n, theta)-1
	p.u2 <- pbinom(u2, 1:n, theta)
	a <- ((1-p) - p.u2)/(p.u1-p.u2)
	upper <- (u2 + a*(u1-u2))/1:n

	l1 <- qbinom(p, 1:n, theta) - 1
	l1 <- replace(l1, l1<0, 0)
	p.l1 <- pbinom(l1, 1:n, theta)
	l2 <- qbinom(p, 1:n, theta)
	p.l2 <- pbinom(l2, 1:n, theta)
	a <- (p - p.l1)/(p.l2-p.l1)
	lower <- (l1 + a*(l2-l1))/1:n
	}

	return(data.frame(x=1:n, up=upper, lo=lower))
	}

	## Raw data on success rates in EPSRC Fellowships
	## http://www.epsrc.ac.uk/skills/fellows/Pages/fellowships.aspx
	data <- data.frame(rate=c(0.3, 0.17, 0.16, 0.1, 0.37, 0.33, 0.31, 0, 0),
	n=c(33, 161, 47, 43, 32, 41, 104, 5, 5),
	label=c("Engineering", "Physical Sciences", "Energy", "Healthcare",
	"Manufacturing the Future", "ICT", "Mathematics",
	"Digital Economy", "Living With Environmental Change"),
	halign=c(0, 1, 0, 0, 0, 0, 0, 0, 0),
	offset=c(1.5, -1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5),
	valign=c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 1))


	## Calculate the expected response rate as a weighted average
	n <- max(data$n)
	theta <- with(data,sum(rate*n)/sum(n))

	## Calculate the funnel plot limits and transform
	require(reshape2)
	tmp <- calculate_funnel_limits(n, theta, method="spiegelhalter")
	tmp <- melt(tmp, id="x")

	## Plot the results
	require(ggplot2)
	require(scales)
	gg <- ggplot(data, aes(x=n, y=rate)) +
	geom_line(data=tmp, aes(x=x, y=value, group=variable), colour="#AAAAAA", linetype="dashed") +
	geom_point() +
	geom_text(aes(x=n+offset, label=label, hjust=halign, vjust=valign), size=3) +
	geom_hline(data=NULL, aes(yintercept=theta), color="red") +
	geom_text(data=NULL, aes(x=max(data$n), hjust=1, vjust=0, y=theta, label="Average"), size=4) +
	labs(x="Number of applications", y="Success rate") +
	scale_y_continuous(label=percent) +
	theme_bw()

	ggsave("epsrc-funnel.png", gg, width=8, height=6, dpi=150)