require(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene | |
library(ChIPseeker) | |
library(GenomicRanges) | |
# Hack chipseeker function to add annotaiton | |
getGenomicAnnoStat <- function(peakAnno) { | |
if ( class(peakAnno) == "GRanges" ) | |
peakAnno <- as.data.frame(peakAnno) | |
anno <- peakAnno$annotation | |
## anno <- sub(" \\(.+", "", anno) | |
anno[grep("exon 1 of", anno)] <- "1st Exon" | |
anno[grep("intron 1 of", anno)] <- "1st Intron" | |
anno[grep("Exon \\(", anno)] <- "Other Exon" | |
anno[grep("Intron \\(", anno)] <- "Other Intron" | |
anno[grep("Downstream", anno)] <- "Downstream (<=3kb)" | |
anno[grep("Promoter", anno)] <- "Promoter" | |
## count frequency | |
anno.table <- table(anno) | |
## calculate ratio | |
anno.ratio <- anno.table/ sum(anno.table) * 100 | |
anno.df <- as.data.frame(anno.ratio) | |
colnames(anno.df) <- c("Feature", "Frequency") | |
anno.df$Numbers <- anno.table | |
lvs <- c( | |
"Promoter", | |
"CpG", #here new class | |
"5' UTR", | |
"3' UTR", | |
"1st Exon", | |
"Other Exon", | |
"1st Intron", | |
"Other Intron", | |
"Downstream (<=3kb)", | |
"Distal Intergenic", | |
"Others") | |
anno.df$Feature <- factor(anno.df$Feature, levels=lvs[lvs %in% anno.df$Feature]) | |
anno.df <- anno.df[order(anno.df$Feature),] | |
return(anno.df) | |
} | |
# get CpG | |
cpg = readr::read_tsv("http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/cpgIslandExt.txt.gz", col_names = FALSE,progress = FALSE) | |
cpg_r = makeGRangesFromDataFrame(cpg, | |
keep.extra.columns = FALSE, | |
ignore.strand = TRUE, end.field = "X4", | |
start.field = "X3", seqnames.field = "X2") | |
# here we create the annotation | |
# df is a data frame with chrom and positions columns and anything else | |
df_r = makeGRangesFromDataFrame(df, | |
keep.extra.columns = TRUE, | |
ignore.strand = TRUE, end.field = "end", | |
start.field = "start", seqnames.field = "chr") | |
an = annotatePeak(df_r, TxDb = txdb, tssRegion = c(-1000,1000), | |
annoDb = "org.Hs.eg.db", verbose = FALSE) | |
idx = findOverlaps(an@anno, cpg_r) | |
an@detailGenomicAnnotation[,"cpg"] = FALSE | |
an@detailGenomicAnnotation[queryHits(idx),"cpg"] = TRUE | |
an@anno$annotation[queryHits(idx)] = "CpG" | |
.df = getGenomicAnnoStat(an@anno) | |
slot(an, "annoStat") <- .df | |
plotAnnoBar(an) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment