Skip to content

Instantly share code, notes, and snippets.

View hurrialice's full-sized avatar
🌷

Qing Zhang hurrialice

🌷
View GitHub Profile
@hurrialice
hurrialice / split_intevals.py
Last active September 19, 2020 06:11
split intevals to N equally spaced chunks
import pandas as pd
import subprocess
import numpy as np
import sys
def interval_divider(bai_path, bam_path=None, N=123):
# get reference length and formal contig name
x = subprocess.check_output("samtools idxstats " + bai_path[:-4], shell=True)
ref_df = pd.DataFrame([a.split('\t') for a in x.decode().rstrip().split("\n")]).rename(columns={0: "chr", 1: "length"})
@hurrialice
hurrialice / batch_igv.py
Last active March 30, 2020 14:02
Run IGV with a list of locations and with GDC api
import igv_remote as ir
import keyboard
import numpy as np
import pandas as pd
cohort = "BRCA"
print("cohort: {}".format(cohort))
# generate localhost http format
def format(uuid):
return("http://localhost:5000/" + uuid + ".bam")
@hurrialice
hurrialice / record_logs.R
Created November 10, 2019 03:55
allow better formatting display of messages, store all communications as list for programmatically access
#' better display and logging backend
#'
#' @param expr expression to evalutate
#' @param tag_width The width of display box of the message tag
#' @param path where the message goes, default to `stdout()`
#' @param save_conditions if `TRUE`, all conditions will be saved to a list
#' @param quiet if `TRUE`, all messages (error excluded) will be suppressed
#' @examples
#' test_function <- function(){
@hurrialice
hurrialice / tests.R
Last active April 5, 2022 05:01
behaviour of LR, wald, score test on binomial data
library(dplyr)
# generate a raw dataframe for all chosen N, p, p1, p2
df_root = expand.grid(N = c(20, 50, 100),
p = seq(from = 0.1, to = 0.9, by = 0.1),
p1 = seq(from = 0.1, to = 0.9, by = 0.1),
p2 = seq(from = 0.1, to = 0.9, by = 0.1))
# number of replicates for
nsim <- 200
library(dplyr)
# generate a raw dataframe for all chosen N, p, p1, p2
df_root = expand.grid(N = c(20, 50, 100),
p = seq(from = 0.1, to = 0.9, by = 0.1),
p1 = seq(from = 0.1, to = 0.9, by = 0.1),
p2 = seq(from = 0.1, to = 0.9, by = 0.1))
# number of replicates for
nsim <- 200
library(readr)
library(dplyr)
reader_raw <- read_tsv("RMBase_hg19_all_mod_RBP_reader.txt", comment = "#", skip = 1, col_names = coln, na = "")
coln <- c("chromosome", "modStart", "modEnd", "modId", "score", "strand", "modName", "modType",
"supportNum", "supportList", "pubmedIds", "geneName", "geneType", "region", "sequence",
"RBPname", "clipExpNum", "clipExplist", "RBPtype")
# loop around different ranks
rank_universe <- 2:8
coph <- vector("list", length(rank_universe))
cls <- vector("list", length(rank_universe))
# the short version
sm_short <- filter_site_and_genes(sm, reader_sites$modId)
# set the parallel prarams
.assign_metagene <- function(m){
library(methods)
library(readr)
library(Matrix)
library(foreach)
library(iterators)
library(doParallel)
library(dplyr)
# this a universal script for doparallel
# never forget to stop the clusters in the end fo main scripts.
@hurrialice
hurrialice / prince_run.R
Created March 13, 2018 13:19
R crush again
## ---------- params set -------------
# define core nums to use when use foreach
no_cores <- 10
# relative importance(higher = more on prior info)
alpha = 0.5
## ----------- load dataset --------------
# load dataset and assign variables
W <- readRDS("string_ppi.rds")