Skip to content

Instantly share code, notes, and snippets.

@LTLA
Last active November 17, 2022 17:49
Show Gist options
  • Save LTLA/6fde8edf558a0d32314043ab59e253cf to your computer and use it in GitHub Desktop.
Save LTLA/6fde8edf558a0d32314043ab59e253cf to your computer and use it in GitHub Desktop.
A script to count reads in introns and exons
# Reading in the annotation.
library(TxDb.Mmusculus.UCSC.mm10.ensGene)
intronic <- intronicParts(TxDb.Mmusculus.UCSC.mm10.ensGene, linked.to.single.gene.only=TRUE)
exonic <- exonicParts(TxDb.Mmusculus.UCSC.mm10.ensGene, linked.to.single.gene.only=TRUE)
IX <- data.frame(GeneID=intronic$gene_id, Chr=as.character(seqnames(intronic)),
Start=start(intronic), End=end(intronic), Strand=strand(intronic), stringsAsFactors=FALSE)
EX <- data.frame(GeneID=exonic$gene_id, Chr=as.character(seqnames(exonic)),
Start=start(exonic), End=end(exonic), Strand=strand(exonic), stringsAsFactors=FALSE)
# Assigning them to genes.
library(Rsubread)
fname <- "head.bam"
output <- paste0(fname, ".featureCounts")
intron.counts <- featureCounts(fname, annot.ext=IX, reportReads="CORE", minOverlap=10)
out.intronic <- paste0(fname, ".intron")
file.rename(output, out.intronic)
exon.counts <- featureCounts(fname, annot.ext=EX, reportReads="CORE", minOverlap=10)
out.exonic <- paste0(fname, ".exon")
file.rename(output, out.exonic)
# Collating the results.
ugenes <- unique(c(unique(EX$GeneId), unique(IX$GeneID)))
out.exon <- out.intron <- out.both <- setNames(integer(length(ugenes)), ugenes)
fin <- file(out.intronic, open='r')
fex <- file(out.exonic, open='r')
repeat {
curin <- read.table(fin, nrow=100, colClasses=c("character", "character", "integer", "character"), stringsAsFactor=FALSE)
curex <- read.table(fex, nrow=100, colClasses=c("character", "character", "integer", "character"), stringsAsFactor=FALSE)
# Figuring out what to do with reads based on their mutual locations.
intron <- !is.na(curin[,4])
exon <- !is.na(curex[,4])
intron.only <- intron & !exon
exon.only <- exon & !intron
both <- intron & exon & curin[,4]==curex[,4]
curin.counts <- table(curin[intron.only,4])
curex.counts <- table(curex[exon.only,4])
both.counts <- table(curin[both,4])
out.intron[names(curin.counts)] <- out.intron[names(curin.counts)] + curin.counts
out.exon[names(curex.counts)] <- out.exon[names(curex.counts)] + curex.counts
out.both[names(both.counts)] <- out.both[names(both.counts)] + both.counts
if (nrow(curin)<100) {
break
}
}
close(fex)
close(fin)
unlink(out.exonic)
unlink(out.intronic)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment