Skip to content

Instantly share code, notes, and snippets.

@slavailn
Created February 20, 2019 23:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slavailn/471b94a01fc5e0dc202ad2d9805fa135 to your computer and use it in GitHub Desktop.
Save slavailn/471b94a01fc5e0dc202ad2d9805fa135 to your computer and use it in GitHub Desktop.
Edger GLM approach with 2 treatments over 3 time-points on small RNA tags
library(edgeR)
setwd(<dir>)
files <- list.files("/tag_counts/", full.names = T) # tab delimited file with sequenca tags and raw counts
files
d <- readDGE(files, columns = c(1,2))
counts <- d$counts
sampleFile <- <sampleFile>
sampleInfo <- read.table(sampleFile, sep = "\t", header = T, stringsAsFactors = F)
sample_layout <- sampleInfo
rownames(sample_layout) <- sample_layout$SampleID
counts <- counts[,match(rownames(sample_layout), colnames(counts))]
head(counts)
head(sampleInfo)
# Creat a compound variable (tissue:treatmentGroup:Age)
Group <- paste(sampleInfo$Tissue, sampleInfo$TreatmentGroup, sampleInfo$Age, sep=".")
Group
sampleInfo$Group <- Group
# Construct DGEList object
y <- DGEList(counts = counts, lib.size = colSums(counts), samples = sampleInfo,
group = as.factor(sampleInfo$Group))
# Keep rows wher CPM is over 1 in at least 10 samples
# Change this as needed
keep <- rowSums(cpm(y)>1) >= 10
y <- y[keep, , keep.lib.sizes=FALSE]
# Recalculate library sized
y$samples$lib.size <- colSums(y$counts)
# Calculate normalization factors
# TMMwzp to accomodate data with lots of zeroes
y <- calcNormFactors(y, method = "TMMwzp")
# GLM approach
design <- model.matrix(~0+group, data=y$samples)
colnames(design) <- levels(y$samples$group)
design
y <- estimateDisp(y, design)
tiff("BCVPlot.tiff", width=500, height=500)
plotBCV(y)
dev.off()
fit <- glmQLFit(y, design)
# Make contrasts to compare stressed vs control in each of the ages and within tissues
my.contrasts <- makeContrasts(Motor_Cortex.Stressed.P20vsMotor_Cortex.Control.P20=Motor_Cortex.Stressed.P20-Motor_Cortex.Control.P20,
Visual_Cortex.Stressed.P20vsVisual_Cortex.Control.P20=Visual_Cortex.Stressed.P20-Visual_Cortex.Control.P20,
Motor_Cortex.Stressed.P35vsMotor_Cortex.Control.P35=Motor_Cortex.Stressed.P35-Motor_Cortex.Control.P35,
Visual_Cortex.Stressed.P35vsVisual_Cortex.Control.P35=Visual_Cortex.Stressed.P35-Visual_Cortex.Control.P35,
Motor_Cortex.Stressed.P50vsMotor_Cortex.Control.P50=Motor_Cortex.Stressed.P50-Motor_Cortex.Control.P50,
Visual_Cortex.Stressed.P50vsVisual_Cortex.Control.P50=Visual_Cortex.Stressed.P50-Visual_Cortex.Control.P50,
levels=levels(as.factor(y$samples$Group)))
################################################################################################################################
## Motor cortex P20
qlf.MC.P20 <- glmQLFTest(fit, contrast=my.contrasts[,"Motor_Cortex.Stressed.P20vsMotor_Cortex.Control.P20"])
topTags(qlf.MC.P20)
tiff("MotorCortex_P20_MAplot.tiff", width=500, height=500)
plotMD(qlf.MC.P20)
abline(h=c(-1,1), col="blue")
dev.off()
summary(decideTests(qlf.MC.P20))
cpms <- cpm(y)[,rownames(subset(y$samples, Group == "Motor_Cortex.Stressed.P20" | Group == "Motor_Cortex.Control.P20"))]
de <- which(p.adjust(qlf.MC.P20$table$PValue, method="BH") < 0.05)
cpms <- cpms[de,]
dim(cpms)
sub <- subset(y$samples, Group == "Motor_Cortex.Stressed.P20" | Group == "Motor_Cortex.Control.P20")
annot_df <- sub[,c(8, 6)]
tiff("Motor_cortex_P20_DE_smallRNAs_heatmap.tiff", width=500, height=800)
pheatmap(cpms, scale = "row", annotation_col = annot_df, fontsize_row=3)
dev.off()
# Attach CPM data and save the results
colnames(cpms) <- paste(colnames(cpms), sub$TreatmentGroup, sep="::")
res_table <- qlf.MC.P20$table
res_table$padj <- p.adjust(res_table$PValue, method = "BH")
res_table <- res_table[rownames(cpms),]
res_table <- data.frame(res_table, cpms)
write.table(res_table, file="res_name.txt", sep="\t", col.names = T, row.names = T, quote=F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment