Skip to content

Instantly share code, notes, and snippets.

@jnpaulson
Created February 1, 2021 06:57
Show Gist options
  • Save jnpaulson/a5b181cc49a521913d2d78682fb45d3d to your computer and use it in GitHub Desktop.
Save jnpaulson/a5b181cc49a521913d2d78682fb45d3d to your computer and use it in GitHub Desktop.
GTEx download of 6p and DE for lung + whole blood
# Actually perform the differential expression analysis
library(yarn)
library(dplyr)
library(readr)
library(biomaRt)
library(limma)
source("voomweights.R")
obj <- readRDS("~/Desktop/gtex_v6p_lung_blood_norm.rds")
gender = pData(obj)$GENDER
batch = factor(as.character(pData(obj)$SMNABTCHT))
tissue <- factor(pData(obj)$SMTSD,levels=c("Whole Blood","Lung"))
design = model.matrix(~tissue+batch+gender)
transformedCounts = assayData(obj)[["normalizedMatrix"]]
voomOutput = voomWeightsCustomized(transformedCounts,design)
fit = lmFit(voomOutput,design)
fit = eBayes(fit)
gl = fData(obj)$hgnc_symbol
tt = topTable(fit,number=Inf,genelist=gl,coef="tissueLung")
saveRDS(list(fit,tt),file="~/Desktop/results.rdata")
# Download and prepare the datafile
library(yarn)
library(dplyr)
library(readr)
library(biomaRt)
library(limma)
cnts <- suppressWarnings(read_delim("https://storage.googleapis.com/gtex_analysis_v6p/rna_seq_data/GTEx_Analysis_v6p_RNA-seq_RNA-SeQCv1.1.8_gene_reads.gct.gz",delim='\t',skip=2))
genes <- unlist(cnts[, 1])
geneNames <- unlist(cnts[, 2])
counts <- cnts[, -c(1:2)]
counts <- as.matrix(counts)
rownames(counts) <- genes
throwAway <- which(rowSums(counts) == 0)
counts <- counts[-throwAway, ]
es <- ExpressionSet(as.matrix(counts))
phenoFile <- "https://storage.googleapis.com/gtex_analysis_v6p/annotations/GTEx_Data_V6_Annotations_SampleAttributesDS.txt"
pd <- suppressWarnings(read_tsv(phenoFile)) %>% as.data.frame
rownames(pd) <- pd[, "SAMPID"]
ids <- sapply(strsplit(pd[, "SAMPID"], "-"), function(i) paste(i[1:2],
collapse = "-"))
pd <- cbind(pd,SUBJID = ids)
pheno2File<- "https://storage.googleapis.com/gtex_analysis_v6p/annotations/GTEx_Data_V6_Annotations_SubjectPhenotypesDS.txt"
pd2 <- suppressWarnings(read_tsv(pheno2File)) %>% as.data.frame
pdfinal <- left_join(pd,pd2)
pdfinal <- pdfinal[match(colnames(counts),pdfinal[["SAMPID"]]),]
phenoData(es) <- AnnotatedDataFrame(pdfinal)
genes <- sub("\\..*", "", rownames(counts))
ensembl = useEnsembl(biomart="ensembl",GRCh=37, dataset="hsapiens_gene_ensembl")
attributes <- c("ensembl_gene_id", "hgnc_symbol", "chromosome_name",
"start_position", "end_position", "gene_biotype")
esFinal <- annotateFromBiomart(obj = es, genes = genes,attributes=attributes)
saveRDS(esFinal, file = "~/Desktop/gtex_v6p.rds")
# Filter to solely whole blood and lung samples
# We checked and this is the 'only' USEFRZ cohort
library(yarn)
library(dplyr)
library(readr)
library(biomaRt)
library(limma)
obj <- readRDS("~/Desktop/gtex_v6p.rds")
obj <- filterSamples(esFinal,c("Whole Blood","Lung"),
groups='SMTSD',keepOnly=TRUE)
obj = filterLowGenes(obj,"SMTSD")
obj <- filterGenes(obj)
### Normalize using qsmooth
obj = normalizeTissueAware(obj,"SMTSD")
saveRDS(obj,file="~/Desktop/gtex_v6p_lung_blood_norm.rds")
# customized voom weights for already transformed values
voomWeightsCustomized <- function(log2counts,design,lib.size = NULL,is.cpm = FALSE) {
if (is.null(design)) {
design <- model.matrix(~ 1 )
}
out<-list()
if (is.cpm == TRUE) {
fit <- lmFit(log2counts, design)
xx <- fit$Amean + mean(log2(lib.size + 1)) - log2(1e+06)
yy <- sqrt(fit$sigma)
l <- lowess(xx, yy, f = .5)
f <- approxfun(l, rule = 2)
fitted.values <- fit$coef %*% t(fit$design)
fitted.cpm <- 2^fitted.values
fitted.count <- 1e-06 * t(t(fitted.cpm) * (lib.size + 1))
fitted.logcount <- log2(fitted.count)
w <- 1/f(fitted.logcount)^4
dim(w) <- dim(fitted.logcount)
}
if (is.cpm == FALSE) {
fit <- lmFit(log2counts, design)
xx <- fit$Amean
yy <- sqrt(fit$sigma)
l <- lowess(xx, yy, f = .5)
f <- approxfun(l, rule = 2)
fitted.values <- fit$coef %*% t(fit$design)
fitted.logcount <- log2(fitted.values)
w <- 1/f(fitted.logcount)^4
dim(w) <- dim(fitted.logcount)
}
out$E = log2counts
out$weights = w
out$design = design
new("EList",out)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment