slavailn/edgeR_GLM_smallRNA.R

## edgeR_GLM_smallRNA.R
library(edgeR)

setwd(<dir>)

files <- list.files("/tag_counts/", full.names = T) # tab delimited file with sequenca tags and raw counts
files
d <- readDGE(files, columns = c(1,2))
counts <- d$counts
sampleFile <- <sampleFile>
sampleInfo <- read.table(sampleFile, sep = "\t", header = T, stringsAsFactors = F)
sample_layout <- sampleInfo
rownames(sample_layout) <- sample_layout$SampleID
counts <- counts[,match(rownames(sample_layout), colnames(counts))]

head(counts)
head(sampleInfo)

# Creat a compound variable (tissue:treatmentGroup:Age)
Group <- paste(sampleInfo$Tissue, sampleInfo$TreatmentGroup, sampleInfo$Age, sep=".")
Group
sampleInfo$Group <- Group

# Construct DGEList object
y <- DGEList(counts = counts, lib.size = colSums(counts), samples = sampleInfo,
              group = as.factor(sampleInfo$Group))

# Keep rows wher CPM is over 1 in at least 10 samples
# Change this as needed
keep <- rowSums(cpm(y)>1) >= 10
y <- y[keep, , keep.lib.sizes=FALSE]

# Recalculate library sized
y$samples$lib.size <- colSums(y$counts)

# Calculate normalization factors
# TMMwzp to accomodate data with lots of zeroes
y <- calcNormFactors(y, method = "TMMwzp")

# GLM approach
design <- model.matrix(~0+group, data=y$samples)
colnames(design) <- levels(y$samples$group)
design

y <- estimateDisp(y, design)
tiff("BCVPlot.tiff", width=500, height=500)
plotBCV(y)
dev.off()

fit <- glmQLFit(y, design)

# Make contrasts to compare stressed vs control in each of the ages and within tissues
my.contrasts <- makeContrasts(Motor_Cortex.Stressed.P20vsMotor_Cortex.Control.P20=Motor_Cortex.Stressed.P20-Motor_Cortex.Control.P20,
                              Visual_Cortex.Stressed.P20vsVisual_Cortex.Control.P20=Visual_Cortex.Stressed.P20-Visual_Cortex.Control.P20,
                              Motor_Cortex.Stressed.P35vsMotor_Cortex.Control.P35=Motor_Cortex.Stressed.P35-Motor_Cortex.Control.P35,
                              Visual_Cortex.Stressed.P35vsVisual_Cortex.Control.P35=Visual_Cortex.Stressed.P35-Visual_Cortex.Control.P35,
                              Motor_Cortex.Stressed.P50vsMotor_Cortex.Control.P50=Motor_Cortex.Stressed.P50-Motor_Cortex.Control.P50,
                              Visual_Cortex.Stressed.P50vsVisual_Cortex.Control.P50=Visual_Cortex.Stressed.P50-Visual_Cortex.Control.P50,
                              levels=levels(as.factor(y$samples$Group)))

################################################################################################################################
## Motor cortex P20
qlf.MC.P20 <- glmQLFTest(fit, contrast=my.contrasts[,"Motor_Cortex.Stressed.P20vsMotor_Cortex.Control.P20"])
topTags(qlf.MC.P20)
tiff("MotorCortex_P20_MAplot.tiff", width=500, height=500)
plotMD(qlf.MC.P20)
abline(h=c(-1,1), col="blue")
dev.off()

summary(decideTests(qlf.MC.P20))

cpms <- cpm(y)[,rownames(subset(y$samples, Group == "Motor_Cortex.Stressed.P20"  | Group == "Motor_Cortex.Control.P20"))]
de <- which(p.adjust(qlf.MC.P20$table$PValue, method="BH") < 0.05)
cpms <- cpms[de,]
dim(cpms)

sub <- subset(y$samples, Group == "Motor_Cortex.Stressed.P20"  | Group == "Motor_Cortex.Control.P20")
annot_df <- sub[,c(8, 6)]

tiff("Motor_cortex_P20_DE_smallRNAs_heatmap.tiff", width=500, height=800)
pheatmap(cpms, scale = "row", annotation_col = annot_df, fontsize_row=3)
dev.off()

# Attach CPM data and save the results
colnames(cpms) <- paste(colnames(cpms), sub$TreatmentGroup, sep="::")
res_table <- qlf.MC.P20$table
res_table$padj <- p.adjust(res_table$PValue, method = "BH")
res_table <- res_table[rownames(cpms),]
res_table <- data.frame(res_table, cpms)
write.table(res_table, file="res_name.txt", sep="\t", col.names = T, row.names = T, quote=F)
	library(edgeR)

	setwd(<dir>)

	files <- list.files("/tag_counts/", full.names = T) # tab delimited file with sequenca tags and raw counts
	files
	d <- readDGE(files, columns = c(1,2))
	counts <- d$counts
	sampleFile <- <sampleFile>
	sampleInfo <- read.table(sampleFile, sep = "\t", header = T, stringsAsFactors = F)
	sample_layout <- sampleInfo
	rownames(sample_layout) <- sample_layout$SampleID
	counts <- counts[,match(rownames(sample_layout), colnames(counts))]

	head(counts)
	head(sampleInfo)

	# Creat a compound variable (tissue:treatmentGroup:Age)
	Group <- paste(sampleInfo$Tissue, sampleInfo$TreatmentGroup, sampleInfo$Age, sep=".")
	Group
	sampleInfo$Group <- Group

	# Construct DGEList object
	y <- DGEList(counts = counts, lib.size = colSums(counts), samples = sampleInfo,
	group = as.factor(sampleInfo$Group))

	# Keep rows wher CPM is over 1 in at least 10 samples
	# Change this as needed
	keep <- rowSums(cpm(y)>1) >= 10
	y <- y[keep, , keep.lib.sizes=FALSE]

	# Recalculate library sized
	y$samples$lib.size <- colSums(y$counts)

	# Calculate normalization factors
	# TMMwzp to accomodate data with lots of zeroes
	y <- calcNormFactors(y, method = "TMMwzp")

	# GLM approach
	design <- model.matrix(~0+group, data=y$samples)
	colnames(design) <- levels(y$samples$group)
	design

	y <- estimateDisp(y, design)
	tiff("BCVPlot.tiff", width=500, height=500)
	plotBCV(y)
	dev.off()

	fit <- glmQLFit(y, design)

	# Make contrasts to compare stressed vs control in each of the ages and within tissues
	my.contrasts <- makeContrasts(Motor_Cortex.Stressed.P20vsMotor_Cortex.Control.P20=Motor_Cortex.Stressed.P20-Motor_Cortex.Control.P20,
	Visual_Cortex.Stressed.P20vsVisual_Cortex.Control.P20=Visual_Cortex.Stressed.P20-Visual_Cortex.Control.P20,
	Motor_Cortex.Stressed.P35vsMotor_Cortex.Control.P35=Motor_Cortex.Stressed.P35-Motor_Cortex.Control.P35,
	Visual_Cortex.Stressed.P35vsVisual_Cortex.Control.P35=Visual_Cortex.Stressed.P35-Visual_Cortex.Control.P35,
	Motor_Cortex.Stressed.P50vsMotor_Cortex.Control.P50=Motor_Cortex.Stressed.P50-Motor_Cortex.Control.P50,
	Visual_Cortex.Stressed.P50vsVisual_Cortex.Control.P50=Visual_Cortex.Stressed.P50-Visual_Cortex.Control.P50,
	levels=levels(as.factor(y$samples$Group)))

	################################################################################################################################
	## Motor cortex P20
	qlf.MC.P20 <- glmQLFTest(fit, contrast=my.contrasts[,"Motor_Cortex.Stressed.P20vsMotor_Cortex.Control.P20"])
	topTags(qlf.MC.P20)
	tiff("MotorCortex_P20_MAplot.tiff", width=500, height=500)
	plotMD(qlf.MC.P20)
	abline(h=c(-1,1), col="blue")
	dev.off()

	summary(decideTests(qlf.MC.P20))

	cpms <- cpm(y)[,rownames(subset(y$samples, Group == "Motor_Cortex.Stressed.P20" \| Group == "Motor_Cortex.Control.P20"))]
	de <- which(p.adjust(qlf.MC.P20$table$PValue, method="BH") < 0.05)
	cpms <- cpms[de,]
	dim(cpms)

	sub <- subset(y$samples, Group == "Motor_Cortex.Stressed.P20" \| Group == "Motor_Cortex.Control.P20")
	annot_df <- sub[,c(8, 6)]

	tiff("Motor_cortex_P20_DE_smallRNAs_heatmap.tiff", width=500, height=800)
	pheatmap(cpms, scale = "row", annotation_col = annot_df, fontsize_row=3)
	dev.off()

	# Attach CPM data and save the results
	colnames(cpms) <- paste(colnames(cpms), sub$TreatmentGroup, sep="::")
	res_table <- qlf.MC.P20$table
	res_table$padj <- p.adjust(res_table$PValue, method = "BH")
	res_table <- res_table[rownames(cpms),]
	res_table <- data.frame(res_table, cpms)
	write.table(res_table, file="res_name.txt", sep="\t", col.names = T, row.names = T, quote=F)