Last active
November 17, 2022 17:49
-
-
Save LTLA/6fde8edf558a0d32314043ab59e253cf to your computer and use it in GitHub Desktop.
A script to count reads in introns and exons
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Reading in the annotation. | |
library(TxDb.Mmusculus.UCSC.mm10.ensGene) | |
intronic <- intronicParts(TxDb.Mmusculus.UCSC.mm10.ensGene, linked.to.single.gene.only=TRUE) | |
exonic <- exonicParts(TxDb.Mmusculus.UCSC.mm10.ensGene, linked.to.single.gene.only=TRUE) | |
IX <- data.frame(GeneID=intronic$gene_id, Chr=as.character(seqnames(intronic)), | |
Start=start(intronic), End=end(intronic), Strand=strand(intronic), stringsAsFactors=FALSE) | |
EX <- data.frame(GeneID=exonic$gene_id, Chr=as.character(seqnames(exonic)), | |
Start=start(exonic), End=end(exonic), Strand=strand(exonic), stringsAsFactors=FALSE) | |
# Assigning them to genes. | |
library(Rsubread) | |
fname <- "head.bam" | |
output <- paste0(fname, ".featureCounts") | |
intron.counts <- featureCounts(fname, annot.ext=IX, reportReads="CORE", minOverlap=10) | |
out.intronic <- paste0(fname, ".intron") | |
file.rename(output, out.intronic) | |
exon.counts <- featureCounts(fname, annot.ext=EX, reportReads="CORE", minOverlap=10) | |
out.exonic <- paste0(fname, ".exon") | |
file.rename(output, out.exonic) | |
# Collating the results. | |
ugenes <- unique(c(unique(EX$GeneId), unique(IX$GeneID))) | |
out.exon <- out.intron <- out.both <- setNames(integer(length(ugenes)), ugenes) | |
fin <- file(out.intronic, open='r') | |
fex <- file(out.exonic, open='r') | |
repeat { | |
curin <- read.table(fin, nrow=100, colClasses=c("character", "character", "integer", "character"), stringsAsFactor=FALSE) | |
curex <- read.table(fex, nrow=100, colClasses=c("character", "character", "integer", "character"), stringsAsFactor=FALSE) | |
# Figuring out what to do with reads based on their mutual locations. | |
intron <- !is.na(curin[,4]) | |
exon <- !is.na(curex[,4]) | |
intron.only <- intron & !exon | |
exon.only <- exon & !intron | |
both <- intron & exon & curin[,4]==curex[,4] | |
curin.counts <- table(curin[intron.only,4]) | |
curex.counts <- table(curex[exon.only,4]) | |
both.counts <- table(curin[both,4]) | |
out.intron[names(curin.counts)] <- out.intron[names(curin.counts)] + curin.counts | |
out.exon[names(curex.counts)] <- out.exon[names(curex.counts)] + curex.counts | |
out.both[names(both.counts)] <- out.both[names(both.counts)] + both.counts | |
if (nrow(curin)<100) { | |
break | |
} | |
} | |
close(fex) | |
close(fin) | |
unlink(out.exonic) | |
unlink(out.intronic) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment