Skip to content

Instantly share code, notes, and snippets.

@slavailn
Created January 17, 2018 22:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save slavailn/0e41c7342bc0151b633e174b1d61fe33 to your computer and use it in GitHub Desktop.
Save slavailn/0e41c7342bc0151b633e174b1d61fe33 to your computer and use it in GitHub Desktop.
annotate RRBS metilene regions
library(ChIPpeakAnno)
library(biomaRt)
library(genomation)
library(dplyr)
# Read in metilene results
res <- read.table("results_de_novo_all_10.txt", sep = "\t", header = T)
head(res)
# Select DMRs defined as segments with q.value < 0.05 and absolute methylation difference over 10%
# Add column threshold that assumes value TRUE in case of DMRS and FALSE for the rest of the segments
res <- res[with(res, order(abs(mean_difference), decreasing = T)),]
res$threshold <- as.factor(abs(res$mean_difference) > 10 & res$Q.value < 0.05)
# Get mart ensembl annotation
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org")
ensembl <- useDataset("hsapiens_gene_ensembl", mart=ensembl)
# Convert metilene results data frame to GRanges object and annoate with the nearest TSS
# using ChIPpeakAnno
res_annot <- annotatePeakInBatch(myPeakList = as(res, "GRanges"),
mart=ensembl,
featureType = "TSS",
output = "nearestLocation",
PeakLocForDistance = "middle",
select = "first",
ignore.strand = T)
# Add gene ids
res_annot <- addGeneIDs(res_annot, mart = ensembl, feature_id_type = "ensembl_gene_id",
IDs2Add = c("entrezgene", "hgnc_symbol", "description"))
# Read CpG island bed and add CpG island shores
cpg.obj <- genomation::readFeatureFlank(location="~/Projects/CRIO_Saliva_RRBS/cpg_island.bed",
feature.flank.name=c("CpGi","shores"))
# Annotate with CpG islands and shores
cgi_ann <- genomation::annotateWithFeatureFlank(res_annot, cpg.obj$CpGi,cpg.obj$shores,
feature.name="CpGi",flank.name="Shores")
cgi_mat <- genomation::getMembers(cgi_ann)
cgi_mat <- as.data.frame(cgi_mat)
res_annot <- as.data.frame(res_annot)
res_annot <- data.frame(res_annot, cgi_mat)
# Select annotated DMRs
dmrs_annot <- dplyr::filter(res_annot, threshold == TRUE)
dim(dmrs_annot)
# Write out annotated results
write.table(res_annot, file = "all_regions_annotated_10.txt", sep = "\t", quote = F, col.names = T, row.names = F)
write.table(dmrs_annot, file = "DMRs_annotated_10.txt", sep = "\t", quote = F, col.names = T, row.names = F)
# Calculate overlaps with common gene parts
gene.parts <- genomation::readTranscriptFeatures("hg38.bed")
dmrs_annot <- as(dmrs_annot, "GRanges")
dmrs_gene_parts <- genomation::annotateWithGeneParts(dmrs_annot, gene.parts)
dmrs_gene_parts
# Get overlap stats with exons, promoters, introns and intergenic sequences
target_annotations <- as.data.frame(getTargetAnnotationStats(dmrs_gene_parts, percentage=TRUE, precedence=TRUE))
names(target_annotations)[1] <- "percent_overlap"
target_annotations$genomic_parts <- rownames(target_annotations)
target_annotations <- melt(target_annotations, id.vars = "genomic_parts")
tiff("Percent_overlap_DMRs_with_features.tiff", width = 500, height = 500)
ggplot(target_annotations, aes(x=genomic_parts, y=value, fill=genomic_parts)) + geom_bar(stat = "identity") +
xlab("Genomic parts") +
ylab("Percent overlapping") +
ggtitle("Percent overlap of DMRs with genomic parts\nPrecedence was set as promoter > exon > intron") +
theme(axis.text=element_text(size=12), axis.title = element_text(size=14, face = "bold")) +
theme(legend.text=element_text(size=10, face = "bold")) +
theme(legend.title=element_text(size=12, face = "bold"))
dev.off()
# Plot histogram of distances to the colosest TSS
tiff("histogram_of_distances_to_closest_TSS.tiff", width = 500, height = 500)
ggplot(as.data.frame(dmrs_annot), aes(distancetoFeature)) + geom_histogram(colour = "white", fill = "darkblue") +
xlab("Distance to TSS") +
ylab("Count") +
ggtitle("Histogram of distances from DMRs to the closest\ntranscriptiona start sites") +
theme(axis.text=element_text(size=12), axis.title = element_text(size=14, face = "bold"))
dev.off()
hist(dmrs_annot$shortestDistance)
sum(dmrs_annot$CpGi)
sum(dmrs_annot$Shores)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment