Skip to content

Instantly share code, notes, and snippets.

@mtmorgan
Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mtmorgan/0093d0818f0c91eae0a2 to your computer and use it in GitHub Desktop.
Save mtmorgan/0093d0818f0c91eae0a2 to your computer and use it in GitHub Desktop.
## Imports: GEOquery, Biobase
acc <- "GSE62944"
if (!file.exists(acc))
GEOquery::getGEOSuppFiles(acc)
setwd(acc)
clinvar <- local({
message("clinvar")
fl <- "GSE62944_TCGA_20_420_Clinical_Variables_7706_Samples.txt.gz"
data <- scan(fl, what=character(), sep="\t", quote="")
m <- matrix(data, 7707)
dimnames(m) <- list(m[,1], m[1,])
df <- as.data.frame(m[-1, -1])
fl <- "GSE62944_TCGA_20_CancerType_Samples.txt.gz"
ct <- read.delim(fl, header=FALSE,
colClasses=c("character", "factor"),
col.names=c("sample", "type"))
idx <- match(rownames(df), ct$sample)
stopifnot(!anyNA(idx))
df$CancerType <- ct$type[idx]
df
})
counts <- local({
message("counts")
fl <- "GSM1536837_TCGA_20.Illumina.tumor_Rsubread_FeatureCounts.txt.gz"
if (!file.exists(fl))
untar("GSE62944_RAW.tar", fl)
m <- scan(fl, what=character(), sep="\t", quote="")
m <- matrix(m, 7707)
dimnames(m) <- list(m[,1], m[1,])
m <- t(m[-1, -1])
mode(m) <- "integer"
m
})
adf <- Biobase::AnnotatedDataFrame(clinvar)
eset <- Biobase::ExpressionSet(counts, adf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment