Qing Zhang hurrialice

## get_bam_link.sh
# test param:
token="cnv"
iclr_appsession="688128634"
wgs_appsession="686817159"

echo "USAGE:"
echo "get_bam_url_iclr <bssh_profile_name> <bssh_iclr_appsession_id>, e.g. get_bam_url_iclr cnv 688128634"
echo "get_bam_url_wgs <bssh_profile_name> <bssh_wgs_appsession_id>, e.g. get_bam_url_wgs cnv 686817159"

get_bam_url_iclr() {

## split_intevals.py
import pandas as pd
import subprocess
import numpy as np
import sys


def interval_divider(bai_path,  bam_path=None, N=123):
    # get reference length and formal contig name
    x = subprocess.check_output("samtools idxstats " + bai_path[:-4], shell=True)
    ref_df = pd.DataFrame([a.split('\t') for a in x.decode().rstrip().split("\n")]).rename(columns={0: "chr", 1: "length"})

## batch_igv.py
import igv_remote as ir
import keyboard
import numpy as np
import pandas as pd

cohort = "BRCA"
print("cohort: {}".format(cohort))
# generate localhost http format
def format(uuid):
    return("http://localhost:5000/" + uuid + ".bam")

## record_logs.R

#' better display and logging backend
#'
#' @param expr expression to evalutate
#' @param tag_width The width of display box of the message tag
#' @param path where the message goes, default to `stdout()`
#' @param save_conditions if `TRUE`, all conditions will be saved to a list
#' @param quiet if `TRUE`, all messages (error excluded) will be suppressed
#' @examples
#' test_function <- function(){

## tests.R
library(dplyr)
# generate a raw dataframe for all chosen N, p, p1, p2
df_root = expand.grid(N = c(20, 50, 100),
                      p = seq(from = 0.1, to = 0.9, by = 0.1),
                      p1 = seq(from = 0.1, to = 0.9, by = 0.1),
                      p2 = seq(from = 0.1, to = 0.9, by = 0.1))

# number of replicates for
nsim <- 200

## bak_222proj.R
library(dplyr)
# generate a raw dataframe for all chosen N, p, p1, p2
df_root = expand.grid(N = c(20, 50, 100),
                      p = seq(from = 0.1, to = 0.9, by = 0.1),
                      p1 = seq(from = 0.1, to = 0.9, by = 0.1),
                      p2 = seq(from = 0.1, to = 0.9, by = 0.1))

# number of replicates for
nsim <- 200

## biochar.R
library(readr)
library(dplyr)


reader_raw <- read_tsv("RMBase_hg19_all_mod_RBP_reader.txt", comment = "#", skip = 1, col_names = coln, na = "")

coln <- c("chromosome", "modStart", "modEnd", "modId", "score", "strand", "modName", "modType",
          "supportNum", "supportList", "pubmedIds", "geneName", "geneType", "region", "sequence",
          "RBPname", "clipExpNum", "clipExplist", "RBPtype")

## nmf_server.R
# loop around different ranks
rank_universe <- 2:8
coph <- vector("list", length(rank_universe))
cls <- vector("list", length(rank_universe))

# the short version
sm_short <- filter_site_and_genes(sm, reader_sites$modId)

# set the parallel prarams
.assign_metagene <- function(m){

## prince_assist.R
library(methods)
library(readr)
library(Matrix)
library(foreach)
library(iterators)
library(doParallel)
library(dplyr)

# this a universal script for doparallel
# never forget to stop the clusters in the end fo main scripts.

## prince_run.R
## ---------- params set -------------
# define core nums to use when use foreach
no_cores <- 10

# relative importance(higher = more on prior info)
alpha = 0.5

## ----------- load dataset --------------
# load dataset and assign variables
W <- readRDS("string_ppi.rds")
	# test param:
	token="cnv"
	iclr_appsession="688128634"
	wgs_appsession="686817159"

	echo "USAGE:"
	echo "get_bam_url_iclr <bssh_profile_name> <bssh_iclr_appsession_id>, e.g. get_bam_url_iclr cnv 688128634"
	echo "get_bam_url_wgs <bssh_profile_name> <bssh_wgs_appsession_id>, e.g. get_bam_url_wgs cnv 686817159"

	get_bam_url_iclr() {
	import pandas as pd
	import subprocess
	import numpy as np
	import sys


	def interval_divider(bai_path, bam_path=None, N=123):
	# get reference length and formal contig name
	x = subprocess.check_output("samtools idxstats " + bai_path[:-4], shell=True)
	ref_df = pd.DataFrame([a.split('\t') for a in x.decode().rstrip().split("\n")]).rename(columns={0: "chr", 1: "length"})
	import igv_remote as ir
	import keyboard
	import numpy as np
	import pandas as pd

	cohort = "BRCA"
	print("cohort: {}".format(cohort))
	# generate localhost http format
	def format(uuid):
	return("http://localhost:5000/" + uuid + ".bam")

	#' better display and logging backend
	#'
	#' @param expr expression to evalutate
	#' @param tag_width The width of display box of the message tag
	#' @param path where the message goes, default to `stdout()`
	#' @param save_conditions if `TRUE`, all conditions will be saved to a list
	#' @param quiet if `TRUE`, all messages (error excluded) will be suppressed
	#' @examples
	#' test_function <- function(){
	library(dplyr)
	# generate a raw dataframe for all chosen N, p, p1, p2
	df_root = expand.grid(N = c(20, 50, 100),
	p = seq(from = 0.1, to = 0.9, by = 0.1),
	p1 = seq(from = 0.1, to = 0.9, by = 0.1),
	p2 = seq(from = 0.1, to = 0.9, by = 0.1))

	# number of replicates for
	nsim <- 200
	library(readr)
	library(dplyr)


	reader_raw <- read_tsv("RMBase_hg19_all_mod_RBP_reader.txt", comment = "#", skip = 1, col_names = coln, na = "")

	coln <- c("chromosome", "modStart", "modEnd", "modId", "score", "strand", "modName", "modType",
	"supportNum", "supportList", "pubmedIds", "geneName", "geneType", "region", "sequence",
	"RBPname", "clipExpNum", "clipExplist", "RBPtype")
	# loop around different ranks
	rank_universe <- 2:8
	coph <- vector("list", length(rank_universe))
	cls <- vector("list", length(rank_universe))

	# the short version
	sm_short <- filter_site_and_genes(sm, reader_sites$modId)

	# set the parallel prarams
	.assign_metagene <- function(m){
	library(methods)
	library(readr)
	library(Matrix)
	library(foreach)
	library(iterators)
	library(doParallel)
	library(dplyr)

	# this a universal script for doparallel
	# never forget to stop the clusters in the end fo main scripts.
	## ---------- params set -------------
	# define core nums to use when use foreach
	no_cores <- 10

	# relative importance(higher = more on prior info)
	alpha = 0.5

	## ----------- load dataset --------------
	# load dataset and assign variables
	W <- readRDS("string_ppi.rds")