slavailn/Process_ChIP-seq_peaks.R

## Process_ChIP-seq_peaks.R
library(GenomicRanges)
library(ChIPpeakAnno)
library(genomation)
library(biomaRt)
library(ChIPseeker)
library(org.Mm.eg.db)
library(ReactomePA)
library(clusterProfiler)

setwd("<project_dir>")
list.files()

# Get MACS2 peaks
setwd("macs2_peaks/")

# Read in MACS2 peaks for each of the conditions
# Select and save peaks with FDR < 0.05
ct <- read.table("Ct/Ct_peaks.blacklist.filtered.bed", sep = "\t", header = F, stringsAsFactors = F)
names(ct) <- c("chr", "start", "end", "peak_id", "score")
ct <- toGRanges(ct)

il4 <- read.table("IL4/IL4_13_peaks.blacklist.filtered.bed", sep = "\t", header = F, stringsAsFactors = F)
names(il4) <- c("chr", "start", "end", "peak_id", "score")
il4 <- toGRanges(il4)

lps <- read.table("LPS_IL4/LPS_IL4_13_peaks.blacklist.filtered.bed", sep = "\t", header = F, stringsAsFactors = F)
names(lps) <- c("chr", "start", "end", "peak_id", "score")
lps <- toGRanges(lps)

###
# Read in SISSRs peaks for each of the conditions
setwd("../sissrs_peaks/")
list.files()
ct_sissr <- read.table("Ct.bed", sep = "\t", header = F, stringsAsFactors = F)
names(ct_sissr) <- c("chr", "start", "end", "pvalue")
ct_sissr <- toGRanges(ct_sissr)

il4_sissr <- read.table("IL4_13.bed", sep = "\t", header = F, stringsAsFactors = F)
names(il4_sissr) <- c("chr", "start", "end", "pvalue")
il4_sissr <- toGRanges(il4_sissr)

lps_sissr <- read.table("LPS_IL4_13.bed", sep = "\t", header = F, stringsAsFactors = F)
names(lps_sissr) <- c("chr", "start", "end", "pvalue")
lps_sissr <- toGRanges(lps_sissr)

# Check overlaps between MACS2 and SISSRs
tiff("Ct_MACS_SISSRs_overlap.tiff", width = 500, height = 500)
makeVennDiagram(list(ct, ct_sissr), NameOfPeaks = c("MACS14", "SISSRs"), main="Control")
dev.off()

tiff("IL4_MACS_SISSRs_overlap.tiff", width = 500, height = 500)
makeVennDiagram(list(il4, il4_sissr), NameOfPeaks = c("MACS14", "SISSRs"), main="IL4")
dev.off()

tiff("LPS_MACS_SISSRs_overlap.tiff", width = 500, height = 500)
makeVennDiagram(list(lps, lps_sissr), NameOfPeaks = c("MACS14", "SISSRs"), main="LPS_IL4")
dev.off()

# What are the overlaps between experimental groups when MACS14 are considered
tiff("MACS14_peaks_overlap.tiff", width = 500, height = 500)
makeVennDiagram(list(ct, il4, lps), NameOfPeaks = c("Control", "IL4", "LPS_IL4"), main="MACS14 peaks")
dev.off()

tiff("MACS14_CT_IL4_peaks_overlap.tiff", width = 500, height = 500)
makeVennDiagram(list(ct, il4), NameOfPeaks = c("Control", "IL4"), main="MACS14 peaks")
dev.off()

tiff("MACS14_CT_LPS_peaks_overlap.tiff", width = 500, height = 500)
makeVennDiagram(list(ct, lps), NameOfPeaks = c("Control", "LPS_IL4"), main="MACS14 peaks")
dev.off()

tiff("MACS14_IL4_LPS_peaks_overlap.tiff", width = 500, height = 500)
makeVennDiagram(list(il4, lps), NameOfPeaks = c("IL4", "LPS_IL4"), main="MACS14 peaks")
dev.off()

######################################################################################################
## Annotate peaks with ChiPpeakAnno
ensembl <- useMart("ensembl")
listDatasets(ensembl)[grep("Mouse", listDatasets(ensembl)$description),]
ensembl <- useDataset("mmusculus_gene_ensembl", mart=ensembl)

ct_annot <- annotatePeakInBatch(myPeakList = ct,
                                 mart=ensembl,
                                 featureType = "TSS",
                                 output = "nearestLocation",
                                 PeakLocForDistance = "middle",
                                 select = "first",
                                 ignore.strand = T)

# Add gene ids
ct_annot <- addGeneIDs(ct_annot, mart = ensembl, feature_id_type = "ensembl_gene_id",
                             IDs2Add = c("entrezgene", "mgi_symbol", "description"))
head(ct_annot)
write.table(ct_annot, file="Ct_annotated_peaks.txt", sep = "\t", col.names = T, row.names = F, quote = F)

il4_annot <- annotatePeakInBatch(myPeakList = il4,
                                mart=ensembl,
                                featureType = "TSS",
                                output = "nearestLocation",
                                PeakLocForDistance = "middle",
                                select = "first",
                                ignore.strand = T)

# Add gene ids
il4_annot <- addGeneIDs(il4_annot, mart = ensembl, feature_id_type = "ensembl_gene_id",
                       IDs2Add = c("entrezgene", "mgi_symbol", "description"))
head(il4_annot)
write.table(il4_annot, file="il4_annotated_peaks.txt", sep = "\t", col.names = T, row.names = F, quote = F)

lps_annot <- annotatePeakInBatch(myPeakList = lps,
                                 mart=ensembl,
                                 featureType = "TSS",
                                 output = "nearestLocation",
                                 PeakLocForDistance = "middle",
                                 select = "first",
                                 ignore.strand = T)

# Add gene ids
lps_annot <- addGeneIDs(lps_annot, mart = ensembl, feature_id_type = "ensembl_gene_id",
                        IDs2Add = c("entrezgene", "mgi_symbol", "description"))
head(lps_annot)
write.table(lps_annot, file="lps_annotated_peaks.txt", sep = "\t", col.names = T, row.names = F, quote = F)

##################################################################################################################
## Classify peaks by relation to genomic features (promoter, intronic, intergenic etc.)
## Aanalyze distribution of dictances to TSS from different datasets
## This will be done using ChipSeeker

## Create annotation data base from biomaRt
txdb <- makeTxDbFromBiomart(biomart="ENSEMBL_MART_ENSEMBL",
                            dataset="mmusculus_gene_ensembl",
                            transcript_ids=NULL,
                            circ_seqs=DEFAULT_CIRC_SEQS,
                            filter=NULL,
                            id_prefix="ensembl_",
                            host="www.ensembl.org",
                            port=80,
                            taxonomyId=NA,
                            miRBaseBuild=NA)
txdb

## Read peak files filetered from blacklisted regions
setwd("~/Projects/ChIP_CREB_BO_17AUG1018/macs2_peaks/")
list.files()
ct_path <- "Ct/Ct_peaks.blacklist.filtered.bed"
il4_path <- "IL4/IL4_13_peaks.blacklist.filtered.bed"
lps_path <- "LPS_IL4/LPS_IL4_13_peaks.blacklist.filtered.bed"

files <- list(ct_path, il4_path, lps_path)
files

# Create a list of peaks
peaks <- lapply(files, function(x) readPeakFile(x))
peaks
names(peaks) <- c("Ct", "IL4", "LPS_IL4")
names(peaks)

# Plot peak coverage over the chromosomes
pdf("Ct_peaks_over_chromosomed.pdf")
covplot(peaks[["Ct"]], weightCol="V5", title = "ChIP Peaks over Chromosomes in Control")
dev.off()

pdf("IL4_peaks_over_chromosomed.pdf")
covplot(peaks[["IL4"]], weightCol="V5", title = "ChIP Peaks over Chromosomes in IL4")
dev.off()

pdf("LPS_IL4_peaks_over_chromosomed.pdf")
covplot(peaks[["LPS_IL4"]], weightCol="V5", title = "ChIP Peaks over Chromosomes in LPS_IL4")
dev.off()

##########################################################################
# Profile ChIP peaks binding to TSS regions
promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
tagMatrix_CT <- getTagMatrix(peaks[["Ct"]], windows=promoter)
tagMatrix_IL4 <- getTagMatrix(peaks[["IL4"]], windows=promoter)
tagMatrix_LPS_IL4 <- getTagMatrix(peaks[["LPS_IL4"]], windows=promoter)

tiff("peak_profile_TSS.tiff", width = 600, height = 500)
par(mfrow=c(1,3))
tagHeatmap(tagMatrix_CT, xlim=c(-3000, 3000), color="red", title = "Control")
tagHeatmap(tagMatrix_IL4, xlim=c(-3000, 3000), color="red", title = "IL4")
tagHeatmap(tagMatrix_LPS_IL4, xlim=c(-3000, 3000), color="red", title = "LPS_IL4")
dev.off()

## Histogram of average distances to TSS, all samples overlayed
tiff("mean_distance_to_TSS_profile.tiff", width = 500, height = 500)
plotAvgProf(list(Control=tagMatrix_CT, IL4=tagMatrix_IL4, LPS_IL4=tagMatrix_LPS_IL4), xlim=c(-3000, 3000),
            xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency")
dev.off()

###########################################################################
# Peak annotation (by default promoter region is -3kb to +3kb.)
# Default region priority for the annotation (this can be changed genomicAnnotationPriority argument)
# Promoter
# 5’ UTR
# 3’ UTR
# Exon
# Intron
# Downstream
# Intergenic
# Compare multiple datasets with one command
names(files) <- c("Control", "IL4", "LPS_IL4")
peakAnnoList <- lapply(files, function(x) annotatePeak(x, TxDb=txdb, tssRegion=c(-3000, 3000),
                                                       annoDb="org.Mm.eg.db"))
peakAnnoList

# Plot annotation bars simultaneusly for multiple datasets
tiff("Genomic_Features_overlaps.tiff", width = 500, height = 500)
plotAnnoBar(peakAnnoList)
dev.off()

# Compare distances to TSS between peak lists
# Compare distribution of distances to TSS between datasets
tiff("distance_to_TSS_comparison.tiff", width = 500, height = 500)
plotDistToTSS(peakAnnoList)
dev.off()

# Save ChIPseeker annotations
write.table(as.data.frame(peakAnnoList[[1]]), file="Control_peaks_ChIPseeker_annotated.txt",
            sep = "\t", quote = F, col.names = T, row.names = F)
write.table(as.data.frame(peakAnnoList[[2]]), file="IL4_peaks_ChIPseeker_annotated.txt",
            sep = "\t", quote = F, col.names = T, row.names = F)
write.table(as.data.frame(peakAnnoList[[3]]), file="IL4_LPS_peaks_ChIPseeker_annotated.txt",
            sep = "\t", quote = F, col.names = T, row.names = F)

###############################################################################################
# Calculate GO and KEGG enrichment using clusterProfiler
ct_annot <- as.data.frame(peakAnnoList[[1]])
il4_annot <- as.data.frame(peakAnnoList[[2]])
lps_annot <- as.data.frame(peakAnnoList[[3]])

# Keep only peaks located within 3000 bp up- or down-stream to TSS
ct_annot <- ct_annot[abs(ct_annot$distanceToTSS) < 3000,]
il4_annot <- il4_annot[abs(il4_annot$distanceToTSS) < 3000,]
lps_annot <- lps_annot[abs(lps_annot$distanceToTSS) < 3000,]

# Calculate GO enrichment
ct_GO <-enrichGO(gene = ct_annot$SYMBOL,
                 OrgDb = org.Mm.eg.db,
                 keyType = "SYMBOL",
                 ont  = "BP",
                 pAdjustMethod = "BH",
                 pvalueCutoff = 0.05,
                 qvalueCutoff = 0.1)

write.table(ct_GO@result, file = "Control_GO.txt", sep = "\t", col.names = T, row.names = F, quote = F)
tiff("Control_GO_dotplot.tiff", width = 600, height = 500)
dotplot(ct_GO, showCategory = 20)
dev.off()

il4_GO <-enrichGO(gene = il4_annot$SYMBOL,
                  OrgDb = org.Mm.eg.db,
                  keyType = "SYMBOL",
                  ont  = "BP",
                  pAdjustMethod = "BH",
                  pvalueCutoff = 0.05,
                  qvalueCutoff = 0.1)
write.table(il4_GO@result, file = "IL4_GO.txt", sep = "\t", col.names = T, row.names = F, quote = F)

tiff("IL4_GO_dotplot.tiff", width = 600, height = 500)
dotplot(il4_GO, showCategory = 20)
dev.off()

lps_GO <-enrichGO(gene = lps_annot$SYMBOL,
                  OrgDb = org.Mm.eg.db,
                  keyType = "SYMBOL",
                  ont  = "BP",
                  pAdjustMethod = "BH",
                  pvalueCutoff = 0.05,
                  qvalueCutoff = 0.1)
write.table(lps_GO@result, file = "LPS_IL4_GO.txt", sep = "\t", col.names = T, row.names = F, quote = F)

tiff("LPS_IL4_GO_dotplot.tiff", width = 600, height = 500)
dotplot(lps_GO, showCategory = 20)
dev.off()

########################################################################################################
## Calculate KEGG enrichment
setwd("../peak_analysis/")
ct_KEGG <- enrichKEGG(gene = ct_annot$ENTREZID,
                      organism = 'mmu',
                      pvalueCutoff = 0.05)
write.table(ct_KEGG@result, file = "CT_KEGG.txt", sep = "\t", col.names = T, row.names = F, quote = F)

tiff("CT_KEGG_dotplot.tiff", width = 600, height = 500)
dotplot(ct_KEGG, showCategory = 20)
dev.off()

il4_KEGG <- enrichKEGG(gene = il4_annot$ENTREZID,
                       organism = 'mmu',
                       pvalueCutoff = 0.05)
write.table(il4_KEGG@result, file = "IL4_KEGG.txt", sep = "\t", col.names = T, row.names = F, quote = F)

tiff("IL4_KEGG_dotplot.tiff", width = 600, height = 500)
dotplot(il4_KEGG, showCategory = 20)
dev.off()

lps_KEGG <- enrichKEGG(gene = lps_annot$ENTREZID,
                       organism = 'mmu',
                       pvalueCutoff = 0.05)
write.table(lps_KEGG@result, file = "LPS_IL4_KEGG.txt", sep = "\t", col.names = T, row.names = F, quote = F)

tiff("LPS_IL4_KEGG_dotplot.tiff", width = 600, height = 500)
dotplot(lps_KEGG, showCategory = 20)
dev.off()
#############################################################################################################
# Calculate enrichment for reactomePA
reactome_ct <- enrichPathway(ct_annot$ENTREZID, organism="mouse", pvalueCutoff = 0.1, pAdjustMethod = "BH", minGSSize = 10,
                             maxGSSize = 500)
write.table(reactome_ct@result, file="CT_Reactome_PA.txt", sep = "\t", col.names = T, row.names = F,
            quote = F)
tiff("CT_Reactome.tiff", width = 600, height = 500)
dotplot(reactome_ct, showCategory = 20)
dev.off()

reactome_il4 <- enrichPathway(il4_annot$ENTREZID, organism="mouse", pvalueCutoff = 0.1, pAdjustMethod = "BH", minGSSize = 10,
                              maxGSSize = 500)
write.table(reactome_il4@result, file="IL4_Reactome_PA.txt", sep = "\t", col.names = T, row.names = F,
            quote = F)
tiff("IL4_Reactome.tiff", width = 600, height = 500)
dotplot(reactome_il4, showCategory = 20)
dev.off()

reactome_lps <- enrichPathway(lps_annot$ENTREZID, organism="mouse", pvalueCutoff = 0.1, pAdjustMethod = "BH", minGSSize = 10,
                              maxGSSize = 500)
write.table(reactome_lps@result, file="LPS_IL4_Reactome_PA.txt", sep = "\t", col.names = T, row.names = F,
            quote = F)
tiff("LPS_IL4_Reactome.tiff", width = 600, height = 500)
dotplot(reactome_lps, showCategory = 20)
dev.off()


#################################################################################################################
# Compare biological themes (GO and pathway enrichment among peak lists)
# Create list of entrez IDs for each of the experiments (promoter associated --> abs(distanceToTSS < 3000))
id_list <- list(Control=ct_annot$ENTREZID, IL4=il4_annot$ENTREZID, LPS_IL4=lps_annot$ENTREZID)
compKEGG <- compareCluster(geneCluster   = id_list,
                           fun           = "enrichKEGG",
                           pvalueCutoff  = 0.05,
                           pAdjustMethod = "BH",
                           organism = "mmu")
tiff("compare_enrichment_KEGG.tiff", width = 600, height = 500)
dotplot(compKEGG, showCategory = 20, title = "KEGG Pathway Enrichment Comparison")
dev.off()

compGO <- compareCluster(geneClusters = id_list,
                         fun = "enrichGO",
                         qvalueCutoff = 0.05,
                         pAdjustMethod = "BH",
                         OrgDb = org.Mm.eg.db,
                         ont = "BP")
tiff("compare_enrichment_GO.tiff", width = 1200, height = 600)
dotplot(compGO, showCategory = 20, title = "GO  Enrichment Comparison")
dev.off()
	library(GenomicRanges)
	library(ChIPpeakAnno)
	library(genomation)
	library(biomaRt)
	library(ChIPseeker)
	library(org.Mm.eg.db)
	library(ReactomePA)
	library(clusterProfiler)

	setwd("<project_dir>")
	list.files()

	# Get MACS2 peaks
	setwd("macs2_peaks/")

	# Read in MACS2 peaks for each of the conditions
	# Select and save peaks with FDR < 0.05
	ct <- read.table("Ct/Ct_peaks.blacklist.filtered.bed", sep = "\t", header = F, stringsAsFactors = F)
	names(ct) <- c("chr", "start", "end", "peak_id", "score")
	ct <- toGRanges(ct)

	il4 <- read.table("IL4/IL4_13_peaks.blacklist.filtered.bed", sep = "\t", header = F, stringsAsFactors = F)
	names(il4) <- c("chr", "start", "end", "peak_id", "score")
	il4 <- toGRanges(il4)

	lps <- read.table("LPS_IL4/LPS_IL4_13_peaks.blacklist.filtered.bed", sep = "\t", header = F, stringsAsFactors = F)
	names(lps) <- c("chr", "start", "end", "peak_id", "score")
	lps <- toGRanges(lps)

	###
	# Read in SISSRs peaks for each of the conditions
	setwd("../sissrs_peaks/")
	list.files()
	ct_sissr <- read.table("Ct.bed", sep = "\t", header = F, stringsAsFactors = F)
	names(ct_sissr) <- c("chr", "start", "end", "pvalue")
	ct_sissr <- toGRanges(ct_sissr)

	il4_sissr <- read.table("IL4_13.bed", sep = "\t", header = F, stringsAsFactors = F)
	names(il4_sissr) <- c("chr", "start", "end", "pvalue")
	il4_sissr <- toGRanges(il4_sissr)

	lps_sissr <- read.table("LPS_IL4_13.bed", sep = "\t", header = F, stringsAsFactors = F)
	names(lps_sissr) <- c("chr", "start", "end", "pvalue")
	lps_sissr <- toGRanges(lps_sissr)

	# Check overlaps between MACS2 and SISSRs
	tiff("Ct_MACS_SISSRs_overlap.tiff", width = 500, height = 500)
	makeVennDiagram(list(ct, ct_sissr), NameOfPeaks = c("MACS14", "SISSRs"), main="Control")
	dev.off()

	tiff("IL4_MACS_SISSRs_overlap.tiff", width = 500, height = 500)
	makeVennDiagram(list(il4, il4_sissr), NameOfPeaks = c("MACS14", "SISSRs"), main="IL4")
	dev.off()

	tiff("LPS_MACS_SISSRs_overlap.tiff", width = 500, height = 500)
	makeVennDiagram(list(lps, lps_sissr), NameOfPeaks = c("MACS14", "SISSRs"), main="LPS_IL4")
	dev.off()

	# What are the overlaps between experimental groups when MACS14 are considered
	tiff("MACS14_peaks_overlap.tiff", width = 500, height = 500)
	makeVennDiagram(list(ct, il4, lps), NameOfPeaks = c("Control", "IL4", "LPS_IL4"), main="MACS14 peaks")
	dev.off()

	tiff("MACS14_CT_IL4_peaks_overlap.tiff", width = 500, height = 500)
	makeVennDiagram(list(ct, il4), NameOfPeaks = c("Control", "IL4"), main="MACS14 peaks")
	dev.off()

	tiff("MACS14_CT_LPS_peaks_overlap.tiff", width = 500, height = 500)
	makeVennDiagram(list(ct, lps), NameOfPeaks = c("Control", "LPS_IL4"), main="MACS14 peaks")
	dev.off()

	tiff("MACS14_IL4_LPS_peaks_overlap.tiff", width = 500, height = 500)
	makeVennDiagram(list(il4, lps), NameOfPeaks = c("IL4", "LPS_IL4"), main="MACS14 peaks")
	dev.off()

	######################################################################################################
	## Annotate peaks with ChiPpeakAnno
	ensembl <- useMart("ensembl")
	listDatasets(ensembl)[grep("Mouse", listDatasets(ensembl)$description),]
	ensembl <- useDataset("mmusculus_gene_ensembl", mart=ensembl)

	ct_annot <- annotatePeakInBatch(myPeakList = ct,
	mart=ensembl,
	featureType = "TSS",
	output = "nearestLocation",
	PeakLocForDistance = "middle",
	select = "first",
	ignore.strand = T)

	# Add gene ids
	ct_annot <- addGeneIDs(ct_annot, mart = ensembl, feature_id_type = "ensembl_gene_id",
	IDs2Add = c("entrezgene", "mgi_symbol", "description"))
	head(ct_annot)
	write.table(ct_annot, file="Ct_annotated_peaks.txt", sep = "\t", col.names = T, row.names = F, quote = F)

	il4_annot <- annotatePeakInBatch(myPeakList = il4,
	mart=ensembl,
	featureType = "TSS",
	output = "nearestLocation",
	PeakLocForDistance = "middle",
	select = "first",
	ignore.strand = T)

	# Add gene ids
	il4_annot <- addGeneIDs(il4_annot, mart = ensembl, feature_id_type = "ensembl_gene_id",
	IDs2Add = c("entrezgene", "mgi_symbol", "description"))
	head(il4_annot)
	write.table(il4_annot, file="il4_annotated_peaks.txt", sep = "\t", col.names = T, row.names = F, quote = F)

	lps_annot <- annotatePeakInBatch(myPeakList = lps,
	mart=ensembl,
	featureType = "TSS",
	output = "nearestLocation",
	PeakLocForDistance = "middle",
	select = "first",
	ignore.strand = T)

	# Add gene ids
	lps_annot <- addGeneIDs(lps_annot, mart = ensembl, feature_id_type = "ensembl_gene_id",
	IDs2Add = c("entrezgene", "mgi_symbol", "description"))
	head(lps_annot)
	write.table(lps_annot, file="lps_annotated_peaks.txt", sep = "\t", col.names = T, row.names = F, quote = F)

	##################################################################################################################
	## Classify peaks by relation to genomic features (promoter, intronic, intergenic etc.)
	## Aanalyze distribution of dictances to TSS from different datasets
	## This will be done using ChipSeeker

	## Create annotation data base from biomaRt
	txdb <- makeTxDbFromBiomart(biomart="ENSEMBL_MART_ENSEMBL",
	dataset="mmusculus_gene_ensembl",
	transcript_ids=NULL,
	circ_seqs=DEFAULT_CIRC_SEQS,
	filter=NULL,
	id_prefix="ensembl_",
	host="www.ensembl.org",
	port=80,
	taxonomyId=NA,
	miRBaseBuild=NA)
	txdb

	## Read peak files filetered from blacklisted regions
	setwd("~/Projects/ChIP_CREB_BO_17AUG1018/macs2_peaks/")
	list.files()
	ct_path <- "Ct/Ct_peaks.blacklist.filtered.bed"
	il4_path <- "IL4/IL4_13_peaks.blacklist.filtered.bed"
	lps_path <- "LPS_IL4/LPS_IL4_13_peaks.blacklist.filtered.bed"

	files <- list(ct_path, il4_path, lps_path)
	files

	# Create a list of peaks
	peaks <- lapply(files, function(x) readPeakFile(x))
	peaks
	names(peaks) <- c("Ct", "IL4", "LPS_IL4")
	names(peaks)

	# Plot peak coverage over the chromosomes
	pdf("Ct_peaks_over_chromosomed.pdf")
	covplot(peaks[["Ct"]], weightCol="V5", title = "ChIP Peaks over Chromosomes in Control")
	dev.off()

	pdf("IL4_peaks_over_chromosomed.pdf")
	covplot(peaks[["IL4"]], weightCol="V5", title = "ChIP Peaks over Chromosomes in IL4")
	dev.off()

	pdf("LPS_IL4_peaks_over_chromosomed.pdf")
	covplot(peaks[["LPS_IL4"]], weightCol="V5", title = "ChIP Peaks over Chromosomes in LPS_IL4")
	dev.off()

	##########################################################################
	# Profile ChIP peaks binding to TSS regions
	promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
	tagMatrix_CT <- getTagMatrix(peaks[["Ct"]], windows=promoter)
	tagMatrix_IL4 <- getTagMatrix(peaks[["IL4"]], windows=promoter)
	tagMatrix_LPS_IL4 <- getTagMatrix(peaks[["LPS_IL4"]], windows=promoter)

	tiff("peak_profile_TSS.tiff", width = 600, height = 500)
	par(mfrow=c(1,3))
	tagHeatmap(tagMatrix_CT, xlim=c(-3000, 3000), color="red", title = "Control")
	tagHeatmap(tagMatrix_IL4, xlim=c(-3000, 3000), color="red", title = "IL4")
	tagHeatmap(tagMatrix_LPS_IL4, xlim=c(-3000, 3000), color="red", title = "LPS_IL4")
	dev.off()

	## Histogram of average distances to TSS, all samples overlayed
	tiff("mean_distance_to_TSS_profile.tiff", width = 500, height = 500)
	plotAvgProf(list(Control=tagMatrix_CT, IL4=tagMatrix_IL4, LPS_IL4=tagMatrix_LPS_IL4), xlim=c(-3000, 3000),
	xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency")
	dev.off()

	###########################################################################
	# Peak annotation (by default promoter region is -3kb to +3kb.)
	# Default region priority for the annotation (this can be changed genomicAnnotationPriority argument)
	# Promoter
	# 5’ UTR
	# 3’ UTR
	# Exon
	# Intron
	# Downstream
	# Intergenic
	# Compare multiple datasets with one command
	names(files) <- c("Control", "IL4", "LPS_IL4")
	peakAnnoList <- lapply(files, function(x) annotatePeak(x, TxDb=txdb, tssRegion=c(-3000, 3000),
	annoDb="org.Mm.eg.db"))
	peakAnnoList

	# Plot annotation bars simultaneusly for multiple datasets
	tiff("Genomic_Features_overlaps.tiff", width = 500, height = 500)
	plotAnnoBar(peakAnnoList)
	dev.off()

	# Compare distances to TSS between peak lists
	# Compare distribution of distances to TSS between datasets
	tiff("distance_to_TSS_comparison.tiff", width = 500, height = 500)
	plotDistToTSS(peakAnnoList)
	dev.off()

	# Save ChIPseeker annotations
	write.table(as.data.frame(peakAnnoList[[1]]), file="Control_peaks_ChIPseeker_annotated.txt",
	sep = "\t", quote = F, col.names = T, row.names = F)
	write.table(as.data.frame(peakAnnoList[[2]]), file="IL4_peaks_ChIPseeker_annotated.txt",
	sep = "\t", quote = F, col.names = T, row.names = F)
	write.table(as.data.frame(peakAnnoList[[3]]), file="IL4_LPS_peaks_ChIPseeker_annotated.txt",
	sep = "\t", quote = F, col.names = T, row.names = F)

	###############################################################################################
	# Calculate GO and KEGG enrichment using clusterProfiler
	ct_annot <- as.data.frame(peakAnnoList[[1]])
	il4_annot <- as.data.frame(peakAnnoList[[2]])
	lps_annot <- as.data.frame(peakAnnoList[[3]])

	# Keep only peaks located within 3000 bp up- or down-stream to TSS
	ct_annot <- ct_annot[abs(ct_annot$distanceToTSS) < 3000,]
	il4_annot <- il4_annot[abs(il4_annot$distanceToTSS) < 3000,]
	lps_annot <- lps_annot[abs(lps_annot$distanceToTSS) < 3000,]

	# Calculate GO enrichment
	ct_GO <-enrichGO(gene = ct_annot$SYMBOL,
	OrgDb = org.Mm.eg.db,
	keyType = "SYMBOL",
	ont = "BP",
	pAdjustMethod = "BH",
	pvalueCutoff = 0.05,
	qvalueCutoff = 0.1)

	write.table(ct_GO@result, file = "Control_GO.txt", sep = "\t", col.names = T, row.names = F, quote = F)
	tiff("Control_GO_dotplot.tiff", width = 600, height = 500)
	dotplot(ct_GO, showCategory = 20)
	dev.off()

	il4_GO <-enrichGO(gene = il4_annot$SYMBOL,
	OrgDb = org.Mm.eg.db,
	keyType = "SYMBOL",
	ont = "BP",
	pAdjustMethod = "BH",
	pvalueCutoff = 0.05,
	qvalueCutoff = 0.1)
	write.table(il4_GO@result, file = "IL4_GO.txt", sep = "\t", col.names = T, row.names = F, quote = F)

	tiff("IL4_GO_dotplot.tiff", width = 600, height = 500)
	dotplot(il4_GO, showCategory = 20)
	dev.off()

	lps_GO <-enrichGO(gene = lps_annot$SYMBOL,
	OrgDb = org.Mm.eg.db,
	keyType = "SYMBOL",
	ont = "BP",
	pAdjustMethod = "BH",
	pvalueCutoff = 0.05,
	qvalueCutoff = 0.1)
	write.table(lps_GO@result, file = "LPS_IL4_GO.txt", sep = "\t", col.names = T, row.names = F, quote = F)

	tiff("LPS_IL4_GO_dotplot.tiff", width = 600, height = 500)
	dotplot(lps_GO, showCategory = 20)
	dev.off()

	########################################################################################################
	## Calculate KEGG enrichment
	setwd("../peak_analysis/")
	ct_KEGG <- enrichKEGG(gene = ct_annot$ENTREZID,
	organism = 'mmu',
	pvalueCutoff = 0.05)
	write.table(ct_KEGG@result, file = "CT_KEGG.txt", sep = "\t", col.names = T, row.names = F, quote = F)

	tiff("CT_KEGG_dotplot.tiff", width = 600, height = 500)
	dotplot(ct_KEGG, showCategory = 20)
	dev.off()

	il4_KEGG <- enrichKEGG(gene = il4_annot$ENTREZID,
	organism = 'mmu',
	pvalueCutoff = 0.05)
	write.table(il4_KEGG@result, file = "IL4_KEGG.txt", sep = "\t", col.names = T, row.names = F, quote = F)

	tiff("IL4_KEGG_dotplot.tiff", width = 600, height = 500)
	dotplot(il4_KEGG, showCategory = 20)
	dev.off()

	lps_KEGG <- enrichKEGG(gene = lps_annot$ENTREZID,
	organism = 'mmu',
	pvalueCutoff = 0.05)
	write.table(lps_KEGG@result, file = "LPS_IL4_KEGG.txt", sep = "\t", col.names = T, row.names = F, quote = F)

	tiff("LPS_IL4_KEGG_dotplot.tiff", width = 600, height = 500)
	dotplot(lps_KEGG, showCategory = 20)
	dev.off()
	#############################################################################################################
	# Calculate enrichment for reactomePA
	reactome_ct <- enrichPathway(ct_annot$ENTREZID, organism="mouse", pvalueCutoff = 0.1, pAdjustMethod = "BH", minGSSize = 10,
	maxGSSize = 500)
	write.table(reactome_ct@result, file="CT_Reactome_PA.txt", sep = "\t", col.names = T, row.names = F,
	quote = F)
	tiff("CT_Reactome.tiff", width = 600, height = 500)
	dotplot(reactome_ct, showCategory = 20)
	dev.off()

	reactome_il4 <- enrichPathway(il4_annot$ENTREZID, organism="mouse", pvalueCutoff = 0.1, pAdjustMethod = "BH", minGSSize = 10,
	maxGSSize = 500)
	write.table(reactome_il4@result, file="IL4_Reactome_PA.txt", sep = "\t", col.names = T, row.names = F,
	quote = F)
	tiff("IL4_Reactome.tiff", width = 600, height = 500)
	dotplot(reactome_il4, showCategory = 20)
	dev.off()

	reactome_lps <- enrichPathway(lps_annot$ENTREZID, organism="mouse", pvalueCutoff = 0.1, pAdjustMethod = "BH", minGSSize = 10,
	maxGSSize = 500)
	write.table(reactome_lps@result, file="LPS_IL4_Reactome_PA.txt", sep = "\t", col.names = T, row.names = F,
	quote = F)
	tiff("LPS_IL4_Reactome.tiff", width = 600, height = 500)
	dotplot(reactome_lps, showCategory = 20)
	dev.off()


	#################################################################################################################
	# Compare biological themes (GO and pathway enrichment among peak lists)
	# Create list of entrez IDs for each of the experiments (promoter associated --> abs(distanceToTSS < 3000))
	id_list <- list(Control=ct_annot$ENTREZID, IL4=il4_annot$ENTREZID, LPS_IL4=lps_annot$ENTREZID)
	compKEGG <- compareCluster(geneCluster = id_list,
	fun = "enrichKEGG",
	pvalueCutoff = 0.05,
	pAdjustMethod = "BH",
	organism = "mmu")
	tiff("compare_enrichment_KEGG.tiff", width = 600, height = 500)
	dotplot(compKEGG, showCategory = 20, title = "KEGG Pathway Enrichment Comparison")
	dev.off()

	compGO <- compareCluster(geneClusters = id_list,
	fun = "enrichGO",
	qvalueCutoff = 0.05,
	pAdjustMethod = "BH",
	OrgDb = org.Mm.eg.db,
	ont = "BP")
	tiff("compare_enrichment_GO.tiff", width = 1200, height = 600)
	dotplot(compGO, showCategory = 20, title = "GO Enrichment Comparison")
	dev.off()