This Snakemake Enhancement Proposal (SEP) suggests several enhancements of Snakemake's logging and report generating capabilities.
Bioinformaticians often have an enormous library of homemade scripts
import pandas as pd | |
mir_miR_correspondence = "/local/home/annata/mirna.mature.offset0.txt" | |
mirna_example_file = "/local/home/annata/SHORTREADS/OFFCONTROL/offcontrol-start/Demux.SRhi10002.Adipocyte%20-%20omental%2c%20donor3.SRhi10002_hg19.11475-119C8.GTGAAA.fastq.gz.filter.shortreads" | |
### READ FILES | |
mirna_df = pd.read_table(mirna_example_file, sep="\s+", header=None, | |
names="id1 id2 nb1 mirna_seq score nb2 short_read_seq type end offset".split(), index_col=0) |
grouping <- c("A","A","B","B") | |
design<-model.matrix(~factor(grouping)) | |
############################################################# | |
### csaw, with its combined window methodology. | |
############################################################ | |
xparam <- readParam(dedup=FALSE) |
switchTab(n) -> {{ RUNTIME('goToTab', {index: n - 1}); }} | |
map 1 :call switchTab(1)<CR> | |
map 2 :call switchTab(2)<CR> | |
map 3 :call switchTab(3)<CR> | |
map 4 :call switchTab(4)<CR> | |
map 5 :call switchTab(5)<CR> | |
map 6 :call switchTab(6)<CR> | |
map 7 :call switchTab(7)<CR> | |
map 8 :call switchTab(8)<CR> | |
map 9 :call switchTab(9)<CR> |
import pandas as pd | |
# A combined P-value was computed for each peak cluster using Simes’ method | |
# (19). For a cluster containing n windows, the combined P-value is defined as | |
# p{s}=min{np{r}/r;r=1,2…,n} where the p{r} are the individual window P-values sorted | |
# in increasing order. This provides weak control of the family-wise error rate | |
# across the set of null hypotheses for all windows in the cluster. In other | |
# words, p{s} represents evidence against the global null hypothesis, i.e. that | |
# no windows in the cluster are DB. |
{ | |
"global": { | |
"check_for_updates_on_startup": true, | |
"show_in_menu_bar": true, | |
"show_profile_name_in_menu_bar": false | |
}, | |
"profiles": [ | |
{ | |
# Works on very large datasets. | |
import pandas as pd | |
try: | |
import mkl | |
mkl.set_num_threads(1) | |
except: | |
pass |
# wget http://big.databio.org/example_data/AIList/AIListTestData.tgz | |
nrows = 1.5e6 | |
from ncls import NCLS | |
from ailist import AIList | |
import numpy as np | |
import pandas as pd |
# ctypedef struct ailist_t: | |
# int64_t nr, mr # Number of regions | |
# interval_t *interval_list # Regions data | |
# uint32_t first, last # Record range of intervals | |
# int nc, lenC[10], idxC[10] | |
# uint32_t *maxE | |
# ... | |
# uint32_t binary_search(interval_t* As, uint32_t idxS, uint32_t idxE, uint32_t qe) nogil |
# Author: denis.engemann@gmail.com | |
# License: simplified BSD (3 clause) | |
# Note: code is based on scipy.stats.pearsonr | |
def ss(a, axis): | |
return np.sum(a * a, axis=axis) | |
def compute_corr(x, y): | |
x = np.asarray(x) | |
y = np.asarray(y) |