VivekTodur/annotate_bed_chippeakanno.R

## annotate_bed_chippeakanno.R
library("ChIPpeakAnno")
library("GenomicRanges")
library("biomaRt")
# Annotate genomic intervals in bed format using ChIPpeakAnno

setwd("~/Projects/grey_miseq/re-doing_hotspots/high_confidence_hotspots/")
list.files()

# Read in bed file with genomic intervals, ATAC-seq hotspots in this case
Q1 <- read.table("Q1_high_confidence.bed", header=F, sep = "\t")
names(Q1) <- c("chr", "start", "end")

# Convert to GRanges
gr1 <- makeGRangesFromDataFrame(Q1, ignore.strand = T, seqnames.field = "chr",
                                start.field = "start", end.field = "end")

# Get mouse TSS
data("TSS.mouse.GRCm38")

########################################################################################################
# Annotate Q1 hotspots with nearest TSS
Q1_annot <- annotatePeakInBatch(myPeakList = gr1,
                                AnnotationData = TSS.mouse.GRCm38,
                                featureType = "TSS",
                                output = "nearestLocation",
                                PeakLocForDistance = "middle",
                                select = "first",
                                ignore.strand = T)

# Load mart
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org")
ensembl <- useDataset("mmusculus_gene_ensembl", mart=ensembl)

Q1_annot <- addGeneIDs(Q1_annot, mart = ensembl, feature_id_type = "ensembl_gene_id",
                       IDs2Add = c("entrezgene", "mgi_symbol", "description"))
write.table(as.data.frame(Q1_annot), "Q1_high_confidence_annotated.txt", sep = "\t", col.names = T,
            row.names = F, quote = F)
	library("ChIPpeakAnno")
	library("GenomicRanges")
	library("biomaRt")
	# Annotate genomic intervals in bed format using ChIPpeakAnno

	setwd("~/Projects/grey_miseq/re-doing_hotspots/high_confidence_hotspots/")
	list.files()

	# Read in bed file with genomic intervals, ATAC-seq hotspots in this case
	Q1 <- read.table("Q1_high_confidence.bed", header=F, sep = "\t")
	names(Q1) <- c("chr", "start", "end")

	# Convert to GRanges
	gr1 <- makeGRangesFromDataFrame(Q1, ignore.strand = T, seqnames.field = "chr",
	start.field = "start", end.field = "end")

	# Get mouse TSS
	data("TSS.mouse.GRCm38")

	########################################################################################################
	# Annotate Q1 hotspots with nearest TSS
	Q1_annot <- annotatePeakInBatch(myPeakList = gr1,
	AnnotationData = TSS.mouse.GRCm38,
	featureType = "TSS",
	output = "nearestLocation",
	PeakLocForDistance = "middle",
	select = "first",
	ignore.strand = T)

	# Load mart
	ensembl <- useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org")
	ensembl <- useDataset("mmusculus_gene_ensembl", mart=ensembl)

	Q1_annot <- addGeneIDs(Q1_annot, mart = ensembl, feature_id_type = "ensembl_gene_id",
	IDs2Add = c("entrezgene", "mgi_symbol", "description"))
	write.table(as.data.frame(Q1_annot), "Q1_high_confidence_annotated.txt", sep = "\t", col.names = T,
	row.names = F, quote = F)