Skip to content

Instantly share code, notes, and snippets.

@Dudemanguy
Created October 18, 2017 16:37
Show Gist options
  • Save Dudemanguy/e4da2676accecad955341377119c6cfc to your computer and use it in GitHub Desktop.
Save Dudemanguy/e4da2676accecad955341377119c6cfc to your computer and use it in GitHub Desktop.
Script for performing differential gene expression between two groups
library(HTSFilter)
library(edgeR)
suppressPackageStartupMessages(library(DESeq2))
options(max.width=9999999, width=10000)
#define functions
#user input
print("Script for performing differential gene expression between two groups")
count_raw <- readline("Enter the file containing the count matrix \n")
count_matrix <- read.table(count_raw)
sample_query <- readline("Press 1 to enter samples manually. Press 2 to read from a file (each entry on a separate line) \n")
#inputting sample groups; provide manual option and reading from a file
if (sample_query==1){
columns_to_keep <- readline("Enter the desired samples as numbers separated by a comma prefaced by a lowercase h (e.g. h001,h002,h003,etc.) \n")
sample_split <- strsplit(columns_to_keep, ",")
count_subset <- subset(count_matrix, select=eval(parse(text=sample_split)))
#ask for condition entry
condition_query2 <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n")
if (condition_query2==1){
define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n")
group_split <- strsplit(define_group, ",")
group <- unlist(group_split)
group <- factor(group, levels=unique(unlist(group_split)))
}
else if (condition_query2==2){
define_group <- readline("Enter the file name containing the condition \n")
group <- readLines(define_group)
group <- factor(group, levels=unique(readLines(define_group)))
}
}
#sample entry
if (sample_query==2){
sample_query2 <- readline("Press 1 if the file only contains the desired samples. Press 2 if the file also defines the condition (tab delimited) \n")
if (sample_query2==1){
print("File only contains sample information")
sample_file <- readline("Enter the file name \n")
columns_to_keep <- readLines(sample_file)
count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep))))
condition_query <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n")
#ask for condition entry
if (condition_query==1){
define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n")
group_split <- strsplit(define_group, ",")
group <- unlist(group_split)
group <- factor(group, levels=unique(unlist(group_split)))
}
else if (condition_query==2){
define_group <- readline("Enter the file name containing the condition \n")
group <- read.table(define_group)
group <- factor(group, levels=unique(read.table(define_group)))
}
}
else if (sample_query==2){
print("File contains sample and condition information")
complete_file <- readline("Enter the file name \n")
complete_table <- read.table(complete_file)
columns_to_keep <- as.vector(complete_table[,1])
count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep))))
group <- complete_table[,2]
group <- factor(group, levels=unique(complete_table[,2]))
}
}
#error handling
if (exists("count_subset")==FALSE){
print("Error. No count matrix found. Retry entry.")
stop()
}
#get the number of samples in the first group
group_table <- table(group)
n <- group_table[[1]][1]
#output directory for results
dir_output <- readline("Enter the name of the output directory \n")
dir.create(dir_output)
#apply HTSFilter
filter <- HTSFilter(count_subset, group, s.min=1, s.max=200, s.len=25)
count_filter <- filter$filteredData
#apply edgeR
y <- DGEList(counts=count_filter, group=group)
y <- calcNormFactors(y)
y <- estimateDisp(y)
et <- exactTest(y)
#apply DESeq2
group_dataframe <- data.frame(group)
dds <- DESeqDataSetFromMatrix(countData=count_filter, colData=group_dataframe, design=~group)
dds <- DESeq(dds)
res <- results(dds)
#create data frame for edgeR results
et_results <- topTags(et, n=Inf, sort.by="none")
write.table(et_results, file="et_output", sep="\t")
et_table <- read.table("et_output")
et_table <- et_table[-(2)]
file.remove("et_output")
#create data frame for DESeq2 results
resOrdered <- res[order(res$padj),]
write.table(resOrdered, file="DESeq_output", sep="\t")
res_order <- read.table("DESeq_output")
file.remove("DESeq_output")
#grab counts, average them, add to data frame, and sort
a <- count_filter[,1:n]
b <- count_filter[,(n+1):ncol(count_filter)]
c <- data.frame(rowMeans(a))
d <- data.frame(rowMeans(b))
et_table["Avg Ct A"] <- c
et_table["Avg Ct B"] <- d
et_table <- et_table[,c(4,5,1,2,3)]
et_table <- et_table[order(et_table$FDR, decreasing=FALSE),]
res_order["Avg Ct A"] <- c
res_order["Avg Ct B"] <- d
res_order <- res_order[,c(7,8,1,2,3,4,5,6)]
#print results and save to file
setwd(dir_output)
dir.create("edgeR")
setwd("edgeR")
sink("dgelist_output")
print(y)
sink()
write.table(et_table, file="full_results", sep="\t", quote=FALSE)
et_sig <- et_table[which(et_table$FDR<0.05),]
write.table(et_sig, file="significant_results", sep="\t", quote=FALSE)
setwd("..")
dir.create("DESeq2")
setwd("DESeq2")
sink("deseq2_summary")
print(summary(res))
sink()
write.table(res_order, file="full_results", sep="\t", quote=FALSE)
res_sig <- res_order[which(res_order$padj<0.05),]
write.table(res_sig, file="significant_results", sep="\t", quote=FALSE)
setwd("..")
setwd("..")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment