Created
August 21, 2017 14:44
-
-
Save memoiry/30715257430b3896507996c53532fe5c to your computer and use it in GitHub Desktop.
runMashup and mashup_runCV_featureSet function added to netDX package.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Run Mashup cross-validation with a provided networks. | |
#' | |
#' @details Creates query files if choosen, runs Mashup for 10-fold cross validation. | |
#' @param Mashup_db (char) path to Mashup generic database with | |
#' training population. | |
#' @param queries_dir (char) directory where a list of query file stored (default 10). | |
#' @param trainID_pred (cbar) training samples' index. | |
#' @param true_pheno (integer) pheno information. | |
#' @param incNets (char) vector of networks to include in this analysis. | |
#' (features/pathway names). Useful for subset-based feature selection. | |
#' @param smooth (char) perform smooth in the network or not. | |
#' @param cut_off (integer) cutoff to eliminate redundant network through network tally. | |
#' @param orgName (char) organism name for Mashup generic database. | |
#' The default value will likely never need to be changed. | |
#' @param write_query (logical) write query file by mashup itself or not. | |
#' @param fileSfx (char) file suffix. | |
#' @param verbose (logical) print messages. | |
#' @param numCores (logical) num parallel threads for cross-validation. | |
#' @param seed_CVqueries (integer) RNG seed for inner cross validation loop. | |
#' Makes deterministic samples held-out for each mashup query (see | |
#' makeCVqueries()) | |
#' @param ... args for \code{makeCVqueries()} | |
#' @examples | |
#' mashup_runCV_featureSet(Mashup_db, queries_dir, trainID_pred, true_pheno, incNets = "all", | |
#' smooth = TRUE, cut_off = 9, orgName = "predictor", write_query = TRUE, fileSfx = "CV", verbose = FALSE, numCores = 2L, seed_CVqueries = 42L, ...) | |
#' @export | |
mashup_runCV_featureSet <- function (Mashup_db, queries_dir, trainID_pred, true_pheno, incNets = "all", | |
smooth = TRUE, cut_off = 9, orgName = "predictor", write_query = TRUE, fileSfx = "CV", verbose = FALSE, numCores = 2L, seed_CVqueries = 42L, ...) | |
{ | |
num_train_samps <- length(true_pheno) | |
if (!file.exists(queries_dir)) | |
dir.create(queries_dir) | |
if (write_query){ | |
if (verbose) | |
cat("\tWriting GM queries: ") | |
qSamps <- makeCVqueries(trainID_pred, verbose = verbose, | |
setSeed = seed_CVqueries, ...) | |
for (m in 1:length(qSamps)) { | |
if (verbose) | |
cat(sprintf("%i ", m)) | |
qFile <- sprintf("%s/%s_%i.query", queries_dir, fileSfx, m) | |
GM_writeQueryFile(qSamps[[m]], incNets, num_train_samps, | |
qFile, orgName) | |
} | |
} | |
runMashup(Mashup_db, queries_dir, true_pheno, trainID_pred = trainID_pred,smooth = smooth, ranking = FALSE, cut_off = cut_off, verbose = verbose) | |
top_net = sprintf("top_networks") | |
if(smooth) | |
top_net <- sprintf("%s/smooth_result/top_networks.txt", queries_dir) | |
if(!smooth) | |
top_net <- sprintf("%s/no_smooth_result/top_networks.txt", queries_dir) | |
mashupTally <- read.delim(top_net, header = FALSE) | |
mashupTally <- sub("_cont", "", mashupTally[[1]]) | |
return(list(top_net = top_net, tally = mashupTally)) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Run a Mashup feature selection or patients ranking query. | |
#' | |
#' @param Mashup_db (char) path to directory with Mashup generic database | |
#' @param queries (char) path to query file | |
#' @param true_pheno (char) pheno information. | |
#' @param trainID_pred (char) training samples' index. | |
#' @param smooth (logical) perform smooth in the network or not. | |
#' @param verbose (logical) print messages | |
#' @param ranking (logical) rank patients or not. | |
#' @param top_net (char) a file stores selected top networks for the interested type. | |
#' @param cut_off (integer) cutoff to eliminate redundant network through network tally. | |
#' @return if used for patients ranking, return path to Mashup PRANK file. | |
#' @examples | |
#' runMashup(Mashup_db, queries, true_pheno, trainID_pred = NULL, smooth = TRUE, verbose = TRUE, | |
#' ranking = TRUE, top_net = NULL, cut_off = 9) | |
#' @export | |
runMashup <- function (Mashup_db, queries, true_pheno, trainID_pred = NULL, smooth = TRUE, verbose = TRUE, | |
ranking = TRUE, top_net = NULL, cut_off = 9) | |
{ | |
# write id and labels file. | |
if (!is.null(trainID_pred) ){ | |
true_pheno$STATUS[which(true_pheno$ID %in% trainID_pred)] <- 1 | |
true_pheno$STATUS[which(!true_pheno$ID %in% trainID_pred)] <- -1 | |
labels_file <- sprintf("%s/labels.txt", queries) | |
if (verbose) | |
cat(labels_file) | |
write.table(true_pheno[c("ID", "STATUS")], | |
file = labels_file, col.names = FALSE, row.names = FALSE, quote = FALSE) | |
} | |
id <- sprintf("%s/ids.txt", dirname(queries)) | |
if (verbose) | |
cat(id) | |
write.table(true_pheno["ID"], | |
file = id, col.names = FALSE, row.names = FALSE, quote = FALSE) | |
# Check if want to smooth the similarity network. | |
smooth_str <- ifelse(smooth, "true", "false") | |
# Default value for cmd | |
cmd <- sprintf("julia") | |
mashup_julia <- sprintf("%s/julia/mashup.jl", path.package("netDx")) | |
# If runnning for pantients ranking. | |
if (ranking){ | |
if (is.null(top_net)){ | |
# In ranking, the query must be a single query flat file instead of a directory contains a | |
# lot of queries file. | |
stopifnot(!dir.exists(queries)) | |
cmd <- sprintf("julia %s ranking --net %s --id %s --CV_query %s --smooth %s --res_dir %s", | |
mashup_julia, Mashup_db, id, queries, smooth_str, dirname(queries)) | |
} | |
else{ | |
stopifnot(!dir.exists(queries)) | |
cmd <- sprintf("julia %s ranking --top_net %s --net %s --id %s --CV_query %s --smooth %s --res_dir %s", | |
mashup_julia, top_net, Mashup_db, id, queries, smooth_str, dirname(queries)) | |
} | |
} | |
# If running for network selection. | |
else{ | |
stopifnot(dir.exists(queries)) | |
cmd <- sprintf("julia %s selection --net %s --id %s --labels %s --CV_query %s --smooth %s --cut_off %d --res_dir %s", | |
mashup_julia, Mashup_db, id, labels_file, queries, smooth_str, cut_off, queries) | |
} | |
print(cmd) | |
#attempt <- 1 | |
#t0 <- Sys.time() | |
#while ((!file.exists(resFile)) & (attempt <= MAX_ATTEMPTS)) { | |
# cat(sprintf("* Attempt %i : %s\n", attempt, basename(queryFile))) | |
# system(cmd, wait = TRUE, ignore.stdout = !verbose, ignore.stderr = !verbose) | |
# attempt <- attempt + 1 | |
#} | |
cat("\nRunning command:\n") | |
system.time(system(cmd, wait = TRUE, ignore.stdout = !verbose, ignore.stderr = !verbose)) | |
#cat(sprintf("QueryRunner time taken: %1.1f s\n", Sys.time() - t0)) | |
if (!dir.exists(queries)){ | |
if (smooth){ | |
res_file <- sprintf("%s_smooth_mashup_PRANK.txt", queries) | |
res_file | |
} | |
else{ | |
res_file <- sprintf("%s_no_smooth_mashup_PRANK.txt", queries) | |
res_file | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment