Skip to content

Instantly share code, notes, and snippets.

@niekwit
Forked from slavailn/annotate_MACS2_peaks.R
Created August 30, 2023 14:22
Show Gist options
  • Save niekwit/7f9a4d2f0f231b4489ffb115f36e54e3 to your computer and use it in GitHub Desktop.
Save niekwit/7f9a4d2f0f231b4489ffb115f36e54e3 to your computer and use it in GitHub Desktop.
Annotate MACS2 peaks using ChIPpeakAnno
library("ChIPpeakAnno")
library("GenomicRanges")
library("org.At.tair.db")
library("TxDb.Athaliana.BioMart.plantsmart28")
library("biomaRt")
# Annotate genomic intervals in bed format using ChIPpeakAnno
# This script was designed for Arabidopsis, but can be easily changed for
# any other organism available through biomaRt
setwd("peaks_directory")
list.files()
annotateMacsPeaks <- function(bedFile)
{
# resFile <- path to resFile
# name of the out file that contains annotated results
# Read in results table
peaks <- read.table(peakFile, header=F, sep = "\t")
# Convert to GRanges
gr <- toGRanges(bedFile, format="narrowPeak", header = FALSE)
# Create GRanges object with annotations from TxDb database
annoData <- toGRanges(TxDb.Athaliana.BioMart.plantsmart28, feature="gene")
# Annotate granges with the nearest TSS
annot <- annotatePeakInBatch(gr,
AnnotationData=annoData,
featureType = "TSS",
output="nearestLocation",
PeakLocForDistance = "start")
# Load mart
ensembl <- useMart(biomart = "plants_mart",
dataset = "athaliana_eg_gene",
host = "plants.ensembl.org")
# Add gene information
annot <- addGeneIDs(annot, mart = ensembl, feature_id_type = "ensembl_gene_id",
IDs2Add = c("entrezgene", "tair_symbol", "description"))
samplename <- gsub("\\.\\w+", "", bedFile)
write.table(annot, paste(samplename, "_annotated.txt", sep = ""), sep = "\t", col.names = T,
row.names = F, quote = F)
}
files <- list.files()[grep(".narrowPeak", list.files())]
lapply( files, function(x) annotateMacsPeaks(x) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment