DarioS/isoDiff.R

## isoDiff.R
isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count = 0)
{
    unknown.what <- setdiff(what, c("exon", "junction", "intronic"))
    if(length(unknown.what) > 0)
        stop(paste(unknown.what, collapse = ", "), " are invalid values for 'what'.")

    counts.df <- as.data.frame(counts)
    features.keep <- counts.df[, "type"] %in% what
    counts.df <- counts.df[features.keep, ]
    counts.cols <- metadata(counts)[["counts.cols"]] + 5 # Transform relative to all data frame columns.
    n.counts.cols <- length(counts.cols)
    sample.names <- colnames(counts.df)[counts.cols]

    feat.indices <- unlist(split(1:nrow(counts.df), counts.df$gene))
    results.list <- by(counts.df, counts.df[, "gene"], function(x)
                   {
                       feat.counts <- x[, counts.cols]
                       counts.OK <- rowSums(feat.counts) > min.count
                       chisq.result <- if(any(colSums(feat.counts[counts.OK, ]) == 0) || sum(counts.OK) < 2) NULL else chisq.test(x[counts.OK, counts.cols])
                       feat.counts.full <- do.call(cbind, lapply(1:n.counts.cols, function(x)
                                                          {
                                                              expect <- s.diff <- rep(NA, nrow(feat.counts))
                                                              obs <- feat.counts[, x]
                                                              if(!is.null(chisq.result))
                                                              {
                                                                expect[counts.OK] <- chisq.result[["expected"]][, x]
                                                                s.diff <- (obs - expect)^2 / expect
                                                              }
                                                              sample.counts.full <- cbind(obs, expect, s.diff)
                                                              colnames(sample.counts.full) <- paste(sample.names[x],
                                                                                                    c("Observed", "Expected", "Z²"))
                                                              sample.counts.full
                                                          }))
                       p.value <- ifelse(!is.null(chisq.result), chisq.result$p.value, 1)
                       list(p.value = p.value, counts = feat.counts.full)
                   })
    ids <- names(results.list)

    if("gene.symbol" %in% colnames(counts.df)) # Gene names not always given to rnaCounts by the user.
        gene.stats <- data.frame(gene = ids, gene.symbol = counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"],
                                 p.value = sapply(results.list, "[[", 1))
    else
        gene.stats <- data.frame(gene = ids, p.value = sapply(results.list, "[[", 1))
    rownames(gene.stats) <- NULL

    feature.stats <- counts[features.keep]
    feature.anno.cols <- colnames(counts.df) %in% c("type", "gene", "gene.symbol")
    values(feature.stats) <- DataFrame(counts.df[, feature.anno.cols],
                                       do.call(rbind, sapply(results.list, "[[", 2))[order(feat.indices), ])

    list(genes = gene.stats, features = feature.stats)
}
	isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count = 0)
	{
	unknown.what <- setdiff(what, c("exon", "junction", "intronic"))
	if(length(unknown.what) > 0)
	stop(paste(unknown.what, collapse = ", "), " are invalid values for 'what'.")

	counts.df <- as.data.frame(counts)
	features.keep <- counts.df[, "type"] %in% what
	counts.df <- counts.df[features.keep, ]
	counts.cols <- metadata(counts)[["counts.cols"]] + 5 # Transform relative to all data frame columns.
	n.counts.cols <- length(counts.cols)
	sample.names <- colnames(counts.df)[counts.cols]

	feat.indices <- unlist(split(1:nrow(counts.df), counts.df$gene))
	results.list <- by(counts.df, counts.df[, "gene"], function(x)
	{
	feat.counts <- x[, counts.cols]
	counts.OK <- rowSums(feat.counts) > min.count
	chisq.result <- if(any(colSums(feat.counts[counts.OK, ]) == 0) \|\| sum(counts.OK) < 2) NULL else chisq.test(x[counts.OK, counts.cols])
	feat.counts.full <- do.call(cbind, lapply(1:n.counts.cols, function(x)
	{
	expect <- s.diff <- rep(NA, nrow(feat.counts))
	obs <- feat.counts[, x]
	if(!is.null(chisq.result))
	{
	expect[counts.OK] <- chisq.result[["expected"]][, x]
	s.diff <- (obs - expect)^2 / expect
	}
	sample.counts.full <- cbind(obs, expect, s.diff)
	colnames(sample.counts.full) <- paste(sample.names[x],
	c("Observed", "Expected", "Z²"))
	sample.counts.full
	}))
	p.value <- ifelse(!is.null(chisq.result), chisq.result$p.value, 1)
	list(p.value = p.value, counts = feat.counts.full)
	})
	ids <- names(results.list)

	if("gene.symbol" %in% colnames(counts.df)) # Gene names not always given to rnaCounts by the user.
	gene.stats <- data.frame(gene = ids, gene.symbol = counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"],
	p.value = sapply(results.list, "[[", 1))
	else
	gene.stats <- data.frame(gene = ids, p.value = sapply(results.list, "[[", 1))
	rownames(gene.stats) <- NULL

	feature.stats <- counts[features.keep]
	feature.anno.cols <- colnames(counts.df) %in% c("type", "gene", "gene.symbol")
	values(feature.stats) <- DataFrame(counts.df[, feature.anno.cols],
	do.call(rbind, sapply(results.list, "[[", 2))[order(feat.indices), ])

	list(genes = gene.stats, features = feature.stats)
	}