Skip to content

Instantly share code, notes, and snippets.

@sjackman
Created February 27, 2013 19:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sjackman/5051160 to your computer and use it in GitHub Desktop.
Save sjackman/5051160 to your computer and use it in GitHub Desktop.
STAT 540 Seminar 7
# STAT 540 Seminar 7
# Shaun Jackman
library(biomaRt)
library(easyRNASeq)
library(Rsamtools)
library(ShortRead)
library(BSgenome.Dmelanogaster.UCSC.dm3)
# Reading BAM Files
bamDat <- readAligned(
"../data/drosophilaRnaSeq/drosophilaMelanogasterSubset.bam",
type = "BAM")
str(bamDat)
indexFile <- indexBam("../data/drosophilaRnaSeq/drosophilaMelanogasterSubset.bam")
# Filtering BAM Files
nFilt <- nFilter(2)
chrFilt <- chromosomeFilter(regex = "chr")
filt <- compose(nFilt, chrFilt)
bamDatFiltered <- bamDat[filt(bamDat)]
# Examining BAM Data
str(bamDatFiltered)
levels(chromosome(bamDatFiltered))
id(bamDatFiltered)[1:10]
sread(bamDatFiltered)[1:10]
quality(bamDatFiltered)[1:10]
position(bamDatFiltered)[1:10]
strand(bamDatFiltered)[1:10]
# What are the differences between the filtered and unfiltered BAM files?
length(position(bamDat))
length(position(bamDatFiltered))
# The unfiltered data has 64206 reads. The filtered data has 56883 reads.
# What are the chromosomes with aligned reads from the BAM file?
summary(chromosome(bamDat))
summary(chromosome(bamDatFiltered))
# Accessing Genome Annotations
chrSizes <- seqlengths(Dmelanogaster)
ensembl <- useMart("ensembl", dataset = "dmelanogaster_gene_ensembl")
annotation.fields <- c("ensembl_gene_id", "strand",
"chromosome_name", "start_position", "end_position")
gene.annotation <- getBM(annotation.fields, mart = ensembl,
filters = "chromosome_name", values = c("2L"))
str(gene.annotation)
levels(as.factor(gene.annotation$chromosome))
gene.annotation$chromosome <- paste("chr", gene.annotation$chromosome_name,
sep = "")
levels(as.factor(gene.annotation$chromosome))
gene.range <- RangedData(
IRanges(
start = gene.annotation$start_position,
end = gene.annotation$end_position),
space = gene.annotation$chromosome, strand = gene.annotation$strand,
gene = gene.annotation$ensembl_gene_id,
universe = "Dm3")
show(gene.range)
# Calculating Coverage
cover <- coverage(bamDatFiltered, width = chrSizes)
gene.coverage <- aggregate(cover[match(names(gene.range), names(cover))],
ranges(gene.range), sum)
gene.coverage <- ceiling(gene.coverage/unique(width(bamDat)))
length(gene.coverage[["chr2L"]])
length(ranges(gene.range)$chr2L)
countTable <- data.frame(chromosome = gene.range$space, gene_start = start(gene.range$ranges),
gene_end = end(gene.range$ranges), strand = gene.range$strand, gene = gene.range$gene,
count = as.vector(gene.coverage[["chr2L"]]))
dim(countTable)
head(countTable)
countTable <- data.frame(chromosome = gene.range$space, gene_start = start(gene.range$ranges),
gene_end = end(gene.range$ranges), strand = gene.range$strand, gene = gene.range$gene,
count = as.vector(gene.coverage[["chr2L"]]), RPKM = (as.vector(gene.coverage[["chr2L"]])
/ (end(gene.range$ranges) - start(gene.range$ranges)))
* (1e+09/length(bamDat)))
head(countTable)
# Take Home Problem
# Create a similar count table for all the exons located on chr2L.
exon.annotation <- getBM(
c("ensembl_exon_id", "strand", "chromosome_name",
"exon_chrom_start", "exon_chrom_end"),
mart = ensembl, filters = "chromosome_name", values = c("2L"))
str(exon.annotation)
exon.annotation$chromosome <- paste("chr", exon.annotation$chromosome_name,
sep = "")
exon.range <- RangedData(
IRanges(
start = exon.annotation$exon_chrom_start,
end = exon.annotation$exon_chrom_end),
space = exon.annotation$chromosome, strand = exon.annotation$strand,
exon = exon.annotation$ensembl_exon_id,
universe = "Dm3")
show(exon.range)
cover <- coverage(bamDatFiltered, width = chrSizes)
exon.coverage <- aggregate(cover[match(names(exon.range), names(cover))],
ranges(exon.range), sum)
exon.coverage <- ceiling(exon.coverage/unique(width(bamDat)))
length(exon.coverage[["chr2L"]])
length(ranges(exon.range)$chr2L)
countTable <- data.frame(chromosome = exon.range$space, exon_start = start(exon.range$ranges),
exon_end = end(exon.range$ranges), strand = exon.range$strand, exon = exon.range$exon,
count = as.vector(exon.coverage[["chr2L"]]))
dim(countTable)
head(countTable)
countTable <- data.frame(chromosome = exon.range$space, exon_start = start(exon.range$ranges),
exon_end = end(exon.range$ranges), strand = exon.range$strand, exon = exon.range$exon,
count = as.vector(exon.coverage[["chr2L"]]), RPKM = (as.vector(exon.coverage[["chr2L"]])
/ (end(exon.range$ranges) - start(exon.range$ranges)))
* (1e+09/length(bamDat)))
head(countTable)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment