Skip to content

Instantly share code, notes, and snippets.

@brian-bot
Last active August 29, 2015 14:08
Show Gist options
  • Save brian-bot/3f2b74694f58404c93fd to your computer and use it in GitHub Desktop.
Save brian-bot/3f2b74694f58404c93fd to your computer and use it in GitHub Desktop.
runAttractorInCRCSC <- function(projectId){
require(synapseClient)
thisFile <- "https://gist.github.com/brian-bot/3f2b74694f58404c93fd"
## PULL FROM THE LINK THAT WAS CREATED IN THE PROJECT AT AN EARLIER STAGE
findLink <- synapseQuery(paste('SELECT id, name FROM link WHERE benefactorId=="', projectId, '"'))
resolveLink <- synGet(findLink$link.id[findLink$link.name=="COAD.attractors.rnaseq.txt"][1])
## GET THE ATTRACTOR METAGENES FOR TCGA COAD FROM THE LINK
message("... reading in attractor metagenes ...")
synAmgCoad <- synGet(unlist(resolveLink$properties$linksTo))
amgCoad <- read.delim(getFileLocation(synAmgCoad), as.is=T)
attractorGenes <- sapply(strsplit(amgCoad$Attractor001.Genes, "|", fixed=T), "[[", 1)
## GRAB THE CURATED RNA-SEQ EXPRESSION DATA ACROSS TCGA COAD AND READ
message("... downloading expression matrix from CRCSC ...")
crcExpr <- synGet("syn2326100")
message("... reading in large expression matrix from CRCSC (this might take a minute) ...")
crc <- read.delim(getFileLocation(crcExpr), as.is=T, row.names=1, check.names=FALSE)
crc <- crc[apply(crc, 1, sd) !=0, ]
## SUBSET TO ONLY ATTRACTORS PULLED FROM COAD
message("... computing the top attractor in CRCSC ...")
crcAm <- colMeans(crc[attractorGenes, ])
## LOOK AT RANK-BASED CORRELATION (SPEARMAN)
message("... calculating spearman correlation with all genes in CRCSC dataset ...")
amCorr <- apply(crc, 1, function(x){
cor(x, crcAm, method = "spearman")
})
amCorr <- data.frame(gene=rownames(crc), spearmanRho=amCorr, stringsAsFactors = FALSE)
amCorr <- amCorr[order(amCorr$spearmanRho), ]
## GRAB GENE WITH THE MOST NEGATIVE CORRELATION
thisGene <- amCorr$gene[which(amCorr$spearmanRho==min(amCorr$spearmanRho, na.rm = TRUE))]
## WRITE OUT TO FILE TO STORE IN SYNAPSE
fileName <- file.path(tempdir(), "coadAttractorMetagene-correlationCRCSC.tsv")
write.table(amCorr, file=fileName, quote=FALSE, sep="\t", row.names=FALSE)
## FIND FOLDER TO STORE RESULTS
fq <- synapseQuery(paste('SELECT id, name FROM folder WHERE benefactorId=="', projectId, '"'))
## CREATE PROVENANCE: DESCRIBE WHAT WAS USED TO GENERATE THE RESULT
message("... recording provenance for this analysis ...")
act <- synStore(Activity(name="COAD Attractor Metagene correlation with CRCSC TCGA",
used=list(list(entity=synAmgCoad, wasExecuted=F),
list(entity=crcExpr, wasExecuted=F),
list(url=thisFile, name="runAttractorInCRCSC.R", wasExecuted=T))))
## SET ANNOTATIONS AND STORE RESULT BACK IN SYNAPSE
message("... storing correlation analysis back in Synapse ...")
synFile <- File(path=fileName, parentId=fq$folder.id[ fq$folder.name=="results" ], min=thisGene)
annotValue(synFile, "projectId") <- projectId
synFile <- synStore(synFile, activity=act)
synUrl <- paste("https://synapse.org/#!Synapse:", synFile@properties$id, sep="")
## SHOW THE NEWLY CREATED RESULT
message("... FINISHED ...")
message(paste("... go here to see your result:", synUrl, "..."))
return(invisible(synFile))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment