Skip to content

Instantly share code, notes, and snippets.

View lwaldron's full-sized avatar

Levi Waldron lwaldron

View GitHub Profile
@lwaldron
lwaldron / cBioPortal tests
Last active September 1, 2020 22:16
Download of full ACC and BRCA datasets, GBM IMPACT341
# to run this using Docker from the command line on the stock Bioconductor image:
# docker run -it bioconductor/bioconductor_docker:latest R
BiocManager::install("cBioPortalData")
library(cBioPortalData)
#acc_tcga full data pack
system.time(accpack <- cBioDataPack("acc_tcga")) #~10 seconds
accpack
@lwaldron
lwaldron / acc_tcga IMPACT341
Created September 1, 2020 19:41
Quick test for download/construction time of acc_tcga IMPACT341 panel
# docker run -it bioconductor/bioconductor_docker:latest R
BiocManager::install("cBioPortalData")
library(cBioPortalData)
cBio <- cBioPortal()
system.time(acc <- cBioPortalData(cBio, studyId = "acc_tcga", genePanelId = "IMPACT341"))
acc
@lwaldron
lwaldron / tcga_gbm_IMPACT341
Last active September 1, 2020 20:58
Quick test for download/construction time of gbm_tcga IMPACT341 panel
# docker run -it bioconductor/bioconductor_docker:latest R
BiocManager::install("cBioPortalData")
library(cBioPortalData)
cBio <- cBioPortal()
system.time(gbm <- cBioPortalData(cBio, studyId = "gbm_tcga", genePanelId = "IMPACT341"))
gbm
# again
system.time(gbm <- cBioPortalData(cBio, studyId = "gbm_tcga", genePanelId = "IMPACT341"))
source("https://raw.githubusercontent.com/waldronlab/bugSigSimple/master/R/simple.R")
x=readCurationSheet("https://github.com/waldronlab/bugSigSimple/blob/master/inst/extdata/Microbial%20signatures%20curation%20-%20signatures.tsv?raw=true")
length(unique(x$PMID))
writeLines(unique(x$PMID), file("signaturesPMID.txt"))
@lwaldron
lwaldron / checkBiocInstallation
Last active January 24, 2019 11:29
Check which Bioconductor packages you can and can't install, with log
library(BiocManager)
dir.create("~/packagefiles")
unlink("installationresults.txt")
pkgs <- available.packages(contrib.url(BiocManager::repositories()["BioCsoft"]))
pkgs <- rownames(pkgs)
set.seed(1)
pkgs <- sample(pkgs)
## pkgs <- pkgs[!pkgs %in% installed.packages()]
@lwaldron
lwaldron / methbenchmark.R
Last active January 14, 2019 10:39
simple DelayedMatrix benchmark showing access time of n rows growing as O(n^3)
if( Biobase::package.version("curatedTCGAData") < "1.5.6" ){
BiocManager::install("waldronlab/curatedTCGAData")
}
stopifnot(BiocManager::version() >= "3.9")
library(curatedTCGAData) #requires >=1.5.6 and bioc-devel
mae <- curatedTCGAData("UCEC", "Methylation_methyl27", dry.run = FALSE) #~2 seconds from cache
dm <- assay(mae, 1)
# benchmarking showing cubic increase with # rows
@lwaldron
lwaldron / GMQLusecase
Created December 19, 2018 20:14
GMQL use case (from Masseroli et al 2018, Bioinformatics bty688)
# Masseroli et al 2018, https://doi.org/10.1093/bioinformatics/bty688
# "In TCGA data of BRCA patients, find the DNA somatic mutations
# within the first 2000 bp outside of the genes that are both
# expressed with FPKM > 3 and have at least a methylation in the same patient
# biospecimen, and extract these mutations of the top 5% patients
# with the highest number of such mutations."
library(curatedTCGAData)
system.time(mae <- curatedTCGAData("ACC", c("Mutation", "RNASeq2GeneNorm", "Methylation"), dry.run = FALSE))
@lwaldron
lwaldron / REexample.R
Created July 11, 2018 11:37
Example of RaggedExperiment::qreduceAssay
## ------------------------------------------------------------------------
library(GenomicRanges)
library(RaggedExperiment)
sample1 <- GRanges(
c(A = "chr1:1-10:-", B = "chr1:8-14:+", C = "chr2:15-18:+"),
score = 3:5)
sample2 <- GRanges(
c(D = "chr1:1-10:-", E = "chr2:11-18:+"),
score = 1:2)
colDat <- DataFrame(id = 1:2)
library(curatedTCGAData)
curatedTCGAData("OV")
mae <- curatedTCGAData("OV", assays=c("mRNAArray", "RNASeq2GeneNorm", "RNASeqGene"), dry.run = FALSE)
library(TCGAutils)
keep <- TCGAsampleSelect(colnames(mae), 11)
mae <- mae[, keep, ]
mae <- intersectColumns(mae)
mae <- intersectRows(mae)
library(org.Hs.eg.db)
@lwaldron
lwaldron / symbolsToRanges.R
Last active June 22, 2018 13:31
Add ranges to RNA-seq and microarray SummarizedExperiments where rownames are gene symbols in a MultiAssayExperiment
.cMAE <- function(mae, x, name="newelement"){
el <- ExperimentList(tmp=x)
names(el)[1] <- name
c(mae, el)
}
.hasSymbols <- function(x){
mean(c(FALSE, grepl("^[A-Z0-9]{1,6}|^C[0-9]orf[0-9]{1,4}", rownames(x))), na.rm=TRUE) > 0.9
}
.isSummarizedExperiment <- function(x){