Skip to content

Instantly share code, notes, and snippets.

@lpantano
Created April 29, 2016 17:16
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save lpantano/c344593ff933f23648e2dde6af8be87b to your computer and use it in GitHub Desktop.
Save lpantano/c344593ff933f23648e2dde6af8be87b to your computer and use it in GitHub Desktop.
require(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
library(ChIPseeker)
library(GenomicRanges)
# Hack chipseeker function to add annotaiton
getGenomicAnnoStat <- function(peakAnno) {
if ( class(peakAnno) == "GRanges" )
peakAnno <- as.data.frame(peakAnno)
anno <- peakAnno$annotation
## anno <- sub(" \\(.+", "", anno)
anno[grep("exon 1 of", anno)] <- "1st Exon"
anno[grep("intron 1 of", anno)] <- "1st Intron"
anno[grep("Exon \\(", anno)] <- "Other Exon"
anno[grep("Intron \\(", anno)] <- "Other Intron"
anno[grep("Downstream", anno)] <- "Downstream (<=3kb)"
anno[grep("Promoter", anno)] <- "Promoter"
## count frequency
anno.table <- table(anno)
## calculate ratio
anno.ratio <- anno.table/ sum(anno.table) * 100
anno.df <- as.data.frame(anno.ratio)
colnames(anno.df) <- c("Feature", "Frequency")
anno.df$Numbers <- anno.table
lvs <- c(
"Promoter",
"CpG", #here new class
"5' UTR",
"3' UTR",
"1st Exon",
"Other Exon",
"1st Intron",
"Other Intron",
"Downstream (<=3kb)",
"Distal Intergenic",
"Others")
anno.df$Feature <- factor(anno.df$Feature, levels=lvs[lvs %in% anno.df$Feature])
anno.df <- anno.df[order(anno.df$Feature),]
return(anno.df)
}
# get CpG
cpg = readr::read_tsv("http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/cpgIslandExt.txt.gz", col_names = FALSE,progress = FALSE)
cpg_r = makeGRangesFromDataFrame(cpg,
keep.extra.columns = FALSE,
ignore.strand = TRUE, end.field = "X4",
start.field = "X3", seqnames.field = "X2")
# here we create the annotation
# df is a data frame with chrom and positions columns and anything else
df_r = makeGRangesFromDataFrame(df,
keep.extra.columns = TRUE,
ignore.strand = TRUE, end.field = "end",
start.field = "start", seqnames.field = "chr")
an = annotatePeak(df_r, TxDb = txdb, tssRegion = c(-1000,1000),
annoDb = "org.Hs.eg.db", verbose = FALSE)
idx = findOverlaps(an@anno, cpg_r)
an@detailGenomicAnnotation[,"cpg"] = FALSE
an@detailGenomicAnnotation[queryHits(idx),"cpg"] = TRUE
an@anno$annotation[queryHits(idx)] = "CpG"
.df = getGenomicAnnoStat(an@anno)
slot(an, "annoStat") <- .df
plotAnnoBar(an)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment