Last active
August 29, 2015 14:00
Find intervals where the mean RNA-seq coverage among 3 (for example) samples is >= a minimum threshold.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compute a BEDGRAPH RNA-seq coverage across the entire genome for each sample: | |
bedtools genomecov -bga -split -ibam sample1.sorted.bam > 1.bg | |
bedtools genomecov -bga -split -ibam sample1.sorted.bam > 2.bg | |
bedtools genomecov -bga -split -ibam sample1.sorted.bam > 3.bg | |
# compute the mean coverage at each interval. | |
bedtools unionbedg -i 1.bg 2.bg 3.bg \ | |
| awk '{sum=0; for (col=4; col<=NF; col++) sum += $col; print $0"\t"sum/(NF-4+1); }' | |
chr1 900 1000 0 60 0 20 | |
chr1 1000 1500 10 60 0 23.3333 | |
chr1 1500 1600 0 60 0 20 | |
chr1 1700 1980 0 50 0 16.6667 | |
chr1 1980 2000 0 50 80 43.3333 | |
chr1 2000 2050 20 50 80 50 | |
chr1 2050 2070 20 0 80 33.3333 | |
chr1 2070 2090 20 0 0 6.66667 | |
chr1 2090 2100 20 0 20 13.3333 | |
# filter such that intervals must exceed a minimum mean coverage (e.g., 10) among the samples: | |
bedtools unionbedg -i 1.bg 2.bg 3.bg \ | |
| awk '{sum=0; for (col=4; col<=NF; col++) sum += $col; print $0"\t"sum/(NF-4+1); }' \ | |
| awk '$7 > 10' | |
chr1 900 1000 0 60 0 20 | |
chr1 1000 1500 10 60 0 23.3333 | |
chr1 1500 1600 0 60 0 20 | |
chr1 1700 1980 0 50 0 16.6667 | |
chr1 1980 2000 0 50 80 43.3333 | |
chr1 2000 2050 20 50 80 50 | |
chr1 2050 2070 20 0 80 33.3333 | |
chr1 2090 2100 20 0 20 13.3333 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment