Skip to content

Instantly share code, notes, and snippets.

@markrobinsonuzh
Last active September 21, 2015 18:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markrobinsonuzh/da426606d78c99f09eba to your computer and use it in GitHub Desktop.
Save markrobinsonuzh/da426606d78c99f09eba to your computer and use it in GitHub Desktop.
# Initial problem posed at:
# http://stats.stackexchange.com/questions/173450/an-impossible-estimation-problem?stw=2
set.seed(167)
x = rnbinom(100, size=3.2, prob=.8);
#x = rnbinom(100, size=3.2, mu=.8);
# note: mu = size*(1-prob)/prob ..
# in this case, mu=prob (3.2*.2/.8 = .8)
n <- 100
ns <- 1000
# randomly create another 999 samples with this setting
z <- matrix(nrow=ns,ncol=n)
for(i in 2:ns)
z[i,] <- rnbinom(n, size=3.2, mu=.8)
z[1,] <- x
library(edgeR)
library(matrixStats)
# estimate with edgeR i.e., adjusted profile likelihood w/ no moderation
d <- DGEList(z)
d$samples$lib.size <- 1
d <- estimateCommonDisp(d)
d <- estimateTagwiseDisp(d,prior.df=0)
# plot the estimates with the truth (blue), Guillaume's sample (red)
plot( 2^f$coefficients, 1/d$tagwise.dispersion, log="y", ylab="size", xlab="mu")
abline(v=.8, col="blue", lwd=3)
abline(h=3.2, col="blue", lwd=3)
points( 2^f$coefficients[1], 1/d$tagwise.dispersion[1], pch=19, cex=2, col="red")
# dispersion is basically zero (or size is large) whenever
# variance is less than mean
#> table( var_lt_mean=rowVars(z) < rowMeans(z),
# size_gt_199=1/d$tagwise.dispersion > 199)
# size_gt_199
#var_lt_mean FALSE TRUE
# FALSE 871 4
# TRUE 0 125
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment