arraytools/Vat2.R Secret

## Vat2.R
rowVars <- function(x, na.rm=FALSE, dims=1, unbiased=TRUE, SumSquares=FALSE,
                    twopass=FALSE) {
  if (SumSquares) return(rowSums(x^2, na.rm, dims))
  N <- rowSums(!is.na(x), FALSE, dims)
  Nm1 <- if (unbiased) N-1 else N
  if (twopass) {x <- if (dims==0) x - mean(x, na.rm=na.rm) else
                     sweep(x, 1:dims, rowMeans(x,na.rm,dims))}
  (rowSums(x^2, na.rm, dims) - rowSums(x, na.rm, dims)^2/N) / Nm1
}

Vat2 <- function(x,label,grp=c(0,1), rvm, a,b, warn = TRUE)  {
    # Two sample t test and v test
    # category is defined by 0,1,2,... with 0 the 2nd set of samples
    # Output:
    #    mn: grp[2] - grp[1] OR x_{label==1} - x_{label==0}
    #    mn1 or mntest: grp[2] OR x_{label==1} OR mean of the test samples
    #    mn2 or mnref: grp[1] OR x_{label==0} OR mean of the reference samples
    #    weight: weight=(n-k)/[(n-k)+2a], k is the number of classes
    #            k=2 in this case.
    #
    #  tmp <- Vat2(matrix(c(1:3, 11:13),1,6), c(0,0,0,1,1,1), rvm = FALSE)
    #  tmp$tp      # 0.0002552167
    #
    #  Method 2.
    #  t.test(1:3, 11:13, var.equal = T)$p.value     # 0.0002552167
    #
    #  Method 3.
    #  require(sva)
    #  pheno <- data.frame(group = c(0,0,0,1,1,1))
    #  mod = model.matrix(~as.factor(group), data = pheno)
    #  mod0 = model.matrix(~1, data = pheno)
    #  f.pvalue(matrix(c(1:3, 11:13),1,6), mod, mod0)   # 0.0002552167
    #
    #  Method 4.
    #  library(limma)
    #  design <- model.matrix(~ factor(c(1,1,1,2,2,2))) # number of samples x number of groups
    #  colnames(design) <- c("group1", "group2")
    #  fit <- lmFit(matrix(c(1:3, 11:13),1,6), design)
    #  unmod.t <- fit$coefficients/fit$stdev.unscaled/fit$sigma
    #  pval <- 2*pt(-abs(unmod.t), fit$df.residual)  # (intercept, group)
    #  pval[2]  #  [1] 0.0002552167
    #
    #  Method 5. seems to be the easiest
    #  library(genefilter)
    #  r1 = rowttests(matrix(c(1:3, 11:13),1,6), factor(c(0,0,0,1,1,1)))
    #  r1$p.value  # [1] 0.0002552167

    label <- as.integer(label)
    if (any(table(label) == 1)) {
      if (warn) warning("At least two observations are needed in each class for the T test")
      return(list(er=1))
    }
    dat1 <- x[,label==grp[2],drop = FALSE]
    dat2 <- x[,label==grp[1],drop = FALSE]

    vr1 <- rowVars(dat1,na.rm=TRUE)
    vr2 <- rowVars(dat2,na.rm=TRUE)
    n1 <- rowSums(!is.na(dat1))
    n2 <- rowSums(!is.na(dat2))
    mn1 <- rowMeans(dat1,na.rm=TRUE)
    mn2 <- rowMeans(dat2,na.rm=TRUE)
    mn <- mn1-mn2
    n <- n1+n2
    vr <- (n1-1)*vr1+(n2-1)*vr2
    vr <- vr/(n-2)
    newn <- 1/(1/n1+1/n2)
    t <- mn/sqrt(vr/newn)
    n[n<3] <- 3
    tp <- 2*(1-pt(abs(t),df=n-2))
    out <- list(sequid=1:length(n1), n1=n1, n2=n2, mn1=mn1, mn2=mn2,
                vr1=vr1, vr2=vr2, mn=mn, vr=vr, n=n, t=t, tp=tp, mntest=mn1, mnref=mn2, er=0)
    if (rvm) {
       if (missing(a) | missing(b)) {
          a <- getab(vr,n-2)
          b <- a[2]
          a <- a[1]
	   }
       v <- newn*(1+2*a/(n-2))/(vr+2/((n-2)*b))
       v <- mn*sqrt(v)
       vp <- 2*(1-pt(abs(v),df=n-2+2*a))
       wt <- (n-2)/((n-2)+2*a)
       out$v <- v
       out$vp <- vp
       out$weight <- wt
       out$a <- a
       out$b <- b
    }
    invisible(out)
}

## Vat2_ex.R
set.seed(1)
ex <- matrix(rnorm(1000*10), nr=1000)
grp <- c(rep(0,5), rep(1, 5))

# Method 1. Vat2()
system.time(tmp <- Vat2(ex, grp, rvm = FALSE))  # .002
p1 <- tmp$tp

# Method 2. t.test
p2 <- rep(NA, nrow(ex))
system.time(for(i in 1:nrow(ex)) p2[i] <- t.test(ex[i, 1:5], ex[i, 6:10], var.equal = TRUE)$p.value) # 0.218

# Method 3. sva
pheno <- data.frame(group = grp)
mod = model.matrix(~as.factor(group), data = pheno)
mod0 = model.matrix(~1, data = pheno)
system.time(p3 <- f.pvalue(ex, mod, mod0) )  # .001

# Method 4. limma
design <- model.matrix(~ factor(grp)) # number of samples x number of groups
colnames(design) <- c("intercept", "group")
system.time(fit <- lmFit(ex, design))   # .003
unmod.t <- fit$coefficients/fit$stdev.unscaled/fit$sigma
pval <- 2*pt(-abs(unmod.t), fit$df.residual)
p4 <- pval[, 2]

# Method 5. genefilter
system.time(r1 <- rowttests(ex, factor(grp)) )  # .002
p5 <- r1$p.value

all.equal(p1, p5)
all.equal(p2, p5)
all.equal(p3, p5)
all.equal(p4, p5)
	rowVars <- function(x, na.rm=FALSE, dims=1, unbiased=TRUE, SumSquares=FALSE,
	twopass=FALSE) {
	if (SumSquares) return(rowSums(x^2, na.rm, dims))
	N <- rowSums(!is.na(x), FALSE, dims)
	Nm1 <- if (unbiased) N-1 else N
	if (twopass) {x <- if (dims==0) x - mean(x, na.rm=na.rm) else
	sweep(x, 1:dims, rowMeans(x,na.rm,dims))}
	(rowSums(x^2, na.rm, dims) - rowSums(x, na.rm, dims)^2/N) / Nm1
	}

	Vat2 <- function(x,label,grp=c(0,1), rvm, a,b, warn = TRUE) {
	# Two sample t test and v test
	# category is defined by 0,1,2,... with 0 the 2nd set of samples
	# Output:
	# mn: grp[2] - grp[1] OR x_{label==1} - x_{label==0}
	# mn1 or mntest: grp[2] OR x_{label==1} OR mean of the test samples
	# mn2 or mnref: grp[1] OR x_{label==0} OR mean of the reference samples
	# weight: weight=(n-k)/[(n-k)+2a], k is the number of classes
	# k=2 in this case.
	#
	# tmp <- Vat2(matrix(c(1:3, 11:13),1,6), c(0,0,0,1,1,1), rvm = FALSE)
	# tmp$tp # 0.0002552167
	#
	# Method 2.
	# t.test(1:3, 11:13, var.equal = T)$p.value # 0.0002552167
	#
	# Method 3.
	# require(sva)
	# pheno <- data.frame(group = c(0,0,0,1,1,1))
	# mod = model.matrix(~as.factor(group), data = pheno)
	# mod0 = model.matrix(~1, data = pheno)
	# f.pvalue(matrix(c(1:3, 11:13),1,6), mod, mod0) # 0.0002552167
	#
	# Method 4.
	# library(limma)
	# design <- model.matrix(~ factor(c(1,1,1,2,2,2))) # number of samples x number of groups
	# colnames(design) <- c("group1", "group2")
	# fit <- lmFit(matrix(c(1:3, 11:13),1,6), design)
	# unmod.t <- fit$coefficients/fit$stdev.unscaled/fit$sigma
	# pval <- 2*pt(-abs(unmod.t), fit$df.residual) # (intercept, group)
	# pval[2] # [1] 0.0002552167
	#
	# Method 5. seems to be the easiest
	# library(genefilter)
	# r1 = rowttests(matrix(c(1:3, 11:13),1,6), factor(c(0,0,0,1,1,1)))
	# r1$p.value # [1] 0.0002552167

	label <- as.integer(label)
	if (any(table(label) == 1)) {
	if (warn) warning("At least two observations are needed in each class for the T test")
	return(list(er=1))
	}
	dat1 <- x[,label==grp[2],drop = FALSE]
	dat2 <- x[,label==grp[1],drop = FALSE]

	vr1 <- rowVars(dat1,na.rm=TRUE)
	vr2 <- rowVars(dat2,na.rm=TRUE)
	n1 <- rowSums(!is.na(dat1))
	n2 <- rowSums(!is.na(dat2))
	mn1 <- rowMeans(dat1,na.rm=TRUE)
	mn2 <- rowMeans(dat2,na.rm=TRUE)
	mn <- mn1-mn2
	n <- n1+n2
	vr <- (n1-1)vr1+(n2-1)vr2
	vr <- vr/(n-2)
	newn <- 1/(1/n1+1/n2)
	t <- mn/sqrt(vr/newn)
	n[n<3] <- 3
	tp <- 2*(1-pt(abs(t),df=n-2))
	out <- list(sequid=1:length(n1), n1=n1, n2=n2, mn1=mn1, mn2=mn2,
	vr1=vr1, vr2=vr2, mn=mn, vr=vr, n=n, t=t, tp=tp, mntest=mn1, mnref=mn2, er=0)
	if (rvm) {
	if (missing(a) \| missing(b)) {
	a <- getab(vr,n-2)
	b <- a[2]
	a <- a[1]
	}
	v <- newn(1+2a/(n-2))/(vr+2/((n-2)*b))
	v <- mn*sqrt(v)
	vp <- 2(1-pt(abs(v),df=n-2+2a))
	wt <- (n-2)/((n-2)+2*a)
	out$v <- v
	out$vp <- vp
	out$weight <- wt
	out$a <- a
	out$b <- b
	}
	invisible(out)
	}
	set.seed(1)
	ex <- matrix(rnorm(1000*10), nr=1000)
	grp <- c(rep(0,5), rep(1, 5))

	# Method 1. Vat2()
	system.time(tmp <- Vat2(ex, grp, rvm = FALSE)) # .002
	p1 <- tmp$tp

	# Method 2. t.test
	p2 <- rep(NA, nrow(ex))
	system.time(for(i in 1:nrow(ex)) p2[i] <- t.test(ex[i, 1:5], ex[i, 6:10], var.equal = TRUE)$p.value) # 0.218

	# Method 3. sva
	pheno <- data.frame(group = grp)
	mod = model.matrix(~as.factor(group), data = pheno)
	mod0 = model.matrix(~1, data = pheno)
	system.time(p3 <- f.pvalue(ex, mod, mod0) ) # .001

	# Method 4. limma
	design <- model.matrix(~ factor(grp)) # number of samples x number of groups
	colnames(design) <- c("intercept", "group")
	system.time(fit <- lmFit(ex, design)) # .003
	unmod.t <- fit$coefficients/fit$stdev.unscaled/fit$sigma
	pval <- 2*pt(-abs(unmod.t), fit$df.residual)
	p4 <- pval[, 2]

	# Method 5. genefilter
	system.time(r1 <- rowttests(ex, factor(grp)) ) # .002
	p5 <- r1$p.value

	all.equal(p1, p5)
	all.equal(p2, p5)
	all.equal(p3, p5)
	all.equal(p4, p5)