title | author | date | output |
---|---|---|---|
2 - normalizing counts |
Nicolas Coutin |
August 29, 2014 |
html_document |
# should exist
Sys.which('bash')
## bash
## "/bin/bash"
Sys.which('sh')
## sh
## "/bin/sh"
Does bash
work?
echo hello world
echo 'a b c' | sed 's/ /\|/g'
# number of lines
awk 'END{print NR;}' 027-engine-bash.Rmd
## hello world
## a|b|c
cd ..
ls bed/*.bed | parallel "bedmap --count --fraction-map 0.5001 ref/sacCer3_w500.bed {} > {.}_w500_tmp"
ls bed/*.bed | parallel "bedmap --count ref/sacCer3_w5.bed {} > {.}_w5_tmp"
paste bed/*w500_tmp > counts_w500.tsv
paste bed/*w5_tmp > counts_w5.tsv
rm bed/*_tmp
library(data.table)
library(dplyr)
library(edgeR)
count_tbl <- fread("~/Desktop/seqdata/choiJ2014A/counts_w500.tsv")
filt_count_tbl <- count_tbl %>%
filter(aveLogCPM(count_tbl) >= -1 & rowSums(count_tbl) >= 60)
window_dge <- DGEList(count_tbl,group=factor(rep(1:6,each=2)))
window_dge <- calcNormFactors(window_dge,doWeighting = FALSE)
window_dge$samples$norm.factors
count_tbl2 <- fread("~/Desktop/seqdata/choiJ2014A/counts_w5.tsv")
window_dge2 <- DGEList(count_tbl2,group=factor(rep(1:6,each=2)))
window_dge2$samples$norm.factors <- window_dge$samples$norm.factors
out <- cpm(window_dge2,log=TRUE,prior.count=2)