Skip to content

Instantly share code, notes, and snippets.

@npjc
Created August 31, 2014 06:18
Show Gist options
  • Save npjc/2942000b736827557781 to your computer and use it in GitHub Desktop.
Save npjc/2942000b736827557781 to your computer and use it in GitHub Desktop.
title author date output
2 - normalizing counts
Nicolas Coutin
August 29, 2014
html_document

Generate windowed reference files.

Run bash scripts

# should exist
Sys.which('bash')
##        bash 
## "/bin/bash"
Sys.which('sh')
##        sh 
## "/bin/sh"

Does bash work?

echo hello world
echo 'a b c' | sed 's/ /\|/g'
# number of lines
awk 'END{print NR;}' 027-engine-bash.Rmd
## hello world
## a|b|c

Count of windowed beds

cd ..
ls bed/*.bed | parallel "bedmap --count --fraction-map 0.5001 ref/sacCer3_w500.bed {} > {.}_w500_tmp"
ls bed/*.bed | parallel "bedmap --count ref/sacCer3_w5.bed {} > {.}_w5_tmp"
paste bed/*w500_tmp > counts_w500.tsv
paste bed/*w5_tmp > counts_w5.tsv
rm bed/*_tmp

Calculate Normalization

library(data.table)
library(dplyr)
library(edgeR)
count_tbl <- fread("~/Desktop/seqdata/choiJ2014A/counts_w500.tsv")
filt_count_tbl <- count_tbl %>%
    filter(aveLogCPM(count_tbl) >= -1 & rowSums(count_tbl) >= 60)
window_dge <- DGEList(count_tbl,group=factor(rep(1:6,each=2)))
window_dge <- calcNormFactors(window_dge,doWeighting = FALSE)
window_dge$samples$norm.factors
count_tbl2 <- fread("~/Desktop/seqdata/choiJ2014A/counts_w5.tsv")
window_dge2 <- DGEList(count_tbl2,group=factor(rep(1:6,each=2)))
window_dge2$samples$norm.factors <- window_dge$samples$norm.factors
out <- cpm(window_dge2,log=TRUE,prior.count=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment