shka/NBGLM-LBC.md

## library_bias_correction.R
library(MASS)
library(parallel)

library_bias_correction.0 <- function(reads, depths.log, libraries) {
    fit <- try(glm.nb(y ~ x + l, link=log, data=data.frame(y = reads, x = depths.log, l = libraries)), silent=T)
    if (inherits(fit, 'try-error')) {
        reads
    } else {
        tmp <- summary(fit)
        if (min(tmp$coefficients[, 'Pr(>|z|)']) >= 0.05) {
            reads
        } else {
            ls <- c(0, fit$coefficients[3:(nlevels(libraries)+1)])
            l.median <- median(ls)
            tmp <- exp(log(reads) - ls[libraries] + l.median)
            tmp[which(reads == 0)] <- 0
            tmp
        }
    }
}

library_bias_correction <- function(reads, depths, libraries, mc.cores=getOption('mc.cores', 2)) {
    nreads <- t(simplify2array(mclapply(as.data.frame(t(reads)), library_bias_correction.0, depths.log=log(depths), libraries=libraries, mc.cores=mc.cores)))
    colnames(nreads) <- colnames(reads)
    rownames(nreads) <- rownames(reads)
    floor(nreads)
}

## library_bias_correction.R
##
## Copyright (c) 2017 Shintaro Katayama
## Released under the MIT license
## http://opensource.org/licenses/mit-license.php

## NBGLM-LBC.md

      
    Raw
  

              NBGLM-LBC.md
            
          
    NBGLM-LBC

Library bias correction for highly multiplexed RNAseq based on a negative binomial generalized linear model.

Primary: https://shka.github.io/NBGLM-LBC
Mirror: https://shka.bitbucket.io/NBGLM-LBC

Requirements


R >= 3.0

NOTE: Because this package uses mclapply function in parallel package, it does NOT work in Windows native environment, but should work in Windows Subsystem for Linux.


Usage

source('https://shka.github.io/NBGLM-LBC.R')
## or https://shka.bitbucket.io/NBGLM-LBC.R through the mirror site

## preparation of a raw count matrix ...
## preparation of depths ...
## preparation of libraries ...

reads.corrected <- library_bias_correction(reads, depths, libraries, mc.cores=3)

## normalization of the corrected counts ...
## downstream analysis using the normalized & corrected counts ...
Function library_bias_correction


Parameters

reads: a matrix of raw counts before normalization/correction; samples by colulmns, and features (ex. genes) by rows.
depths: a vector of samples' depths. The order must be same with the columns of reads. The recommendation of depth is sum of mapped reads and unmapped reads, when the mapping rates are equivalent between the samples.
libraries: a factor of samples' libraries. The order must be same with the columns of reads. The number of samples must be greater than 0 for all samples.
mc.cores: (optional) number of cores for the bias correction.


Output

A matrix of counts after the library bias correction. While the corrected matrix can be applied to some methods, which use raw read counts (ex. differential expression tests by DESeq, edgeR, SAMseq/SAMstrt etc), normalization of the corrected read counts is required before many down-stream analysis (ex. PCA, WGCNA, t-SNE etc).


History


2019-05-29 Wed: Update the document using a simple URL and a mirror site
2017-09-29 Fri: Add a document.
2017-04-03 Mon: Remove further normalization steps
2017-03-19 Sun: Initial

Copyright (c) 2017 Shintaro Katayama
Released under the MIT license
http://opensource.org/licenses/mit-license.php
	library(MASS)
	library(parallel)

	library_bias_correction.0 <- function(reads, depths.log, libraries) {
	fit <- try(glm.nb(y ~ x + l, link=log, data=data.frame(y = reads, x = depths.log, l = libraries)), silent=T)
	if (inherits(fit, 'try-error')) {
	reads
	} else {
	tmp <- summary(fit)
	if (min(tmp$coefficients[, 'Pr(>\|z\|)']) >= 0.05) {
	reads
	} else {
	ls <- c(0, fit$coefficients[3:(nlevels(libraries)+1)])
	l.median <- median(ls)
	tmp <- exp(log(reads) - ls[libraries] + l.median)
	tmp[which(reads == 0)] <- 0
	tmp
	}
	}
	}

	library_bias_correction <- function(reads, depths, libraries, mc.cores=getOption('mc.cores', 2)) {
	nreads <- t(simplify2array(mclapply(as.data.frame(t(reads)), library_bias_correction.0, depths.log=log(depths), libraries=libraries, mc.cores=mc.cores)))
	colnames(nreads) <- colnames(reads)
	rownames(nreads) <- rownames(reads)
	floor(nreads)
	}

	## library_bias_correction.R
	##
	## Copyright (c) 2017 Shintaro Katayama
	## Released under the MIT license
	## http://opensource.org/licenses/mit-license.php