This Snakemake Enhancement Proposal (SEP) suggests several enhancements of Snakemake's logging and report generating capabilities.
Bioinformaticians often have an enormous library of homemade scripts
| import pandas as pd | |
| mir_miR_correspondence = "/local/home/annata/mirna.mature.offset0.txt" | |
| mirna_example_file = "/local/home/annata/SHORTREADS/OFFCONTROL/offcontrol-start/Demux.SRhi10002.Adipocyte%20-%20omental%2c%20donor3.SRhi10002_hg19.11475-119C8.GTGAAA.fastq.gz.filter.shortreads" | |
| ### READ FILES | |
| mirna_df = pd.read_table(mirna_example_file, sep="\s+", header=None, | |
| names="id1 id2 nb1 mirna_seq score nb2 short_read_seq type end offset".split(), index_col=0) |
| grouping <- c("A","A","B","B") | |
| design<-model.matrix(~factor(grouping)) | |
| ############################################################# | |
| ### csaw, with its combined window methodology. | |
| ############################################################ | |
| xparam <- readParam(dedup=FALSE) |
| switchTab(n) -> {{ RUNTIME('goToTab', {index: n - 1}); }} | |
| map 1 :call switchTab(1)<CR> | |
| map 2 :call switchTab(2)<CR> | |
| map 3 :call switchTab(3)<CR> | |
| map 4 :call switchTab(4)<CR> | |
| map 5 :call switchTab(5)<CR> | |
| map 6 :call switchTab(6)<CR> | |
| map 7 :call switchTab(7)<CR> | |
| map 8 :call switchTab(8)<CR> | |
| map 9 :call switchTab(9)<CR> |
| import pandas as pd | |
| # A combined P-value was computed for each peak cluster using Simes’ method | |
| # (19). For a cluster containing n windows, the combined P-value is defined as | |
| # p{s}=min{np{r}/r;r=1,2…,n} where the p{r} are the individual window P-values sorted | |
| # in increasing order. This provides weak control of the family-wise error rate | |
| # across the set of null hypotheses for all windows in the cluster. In other | |
| # words, p{s} represents evidence against the global null hypothesis, i.e. that | |
| # no windows in the cluster are DB. |
| { | |
| "global": { | |
| "check_for_updates_on_startup": true, | |
| "show_in_menu_bar": true, | |
| "show_profile_name_in_menu_bar": false | |
| }, | |
| "profiles": [ | |
| { | |
| # Works on very large datasets. | |
| import pandas as pd | |
| try: | |
| import mkl | |
| mkl.set_num_threads(1) | |
| except: | |
| pass |
| # wget http://big.databio.org/example_data/AIList/AIListTestData.tgz | |
| nrows = 1.5e6 | |
| from ncls import NCLS | |
| from ailist import AIList | |
| import numpy as np | |
| import pandas as pd |
| # ctypedef struct ailist_t: | |
| # int64_t nr, mr # Number of regions | |
| # interval_t *interval_list # Regions data | |
| # uint32_t first, last # Record range of intervals | |
| # int nc, lenC[10], idxC[10] | |
| # uint32_t *maxE | |
| # ... | |
| # uint32_t binary_search(interval_t* As, uint32_t idxS, uint32_t idxE, uint32_t qe) nogil |
| # Author: denis.engemann@gmail.com | |
| # License: simplified BSD (3 clause) | |
| # Note: code is based on scipy.stats.pearsonr | |
| def ss(a, axis): | |
| return np.sum(a * a, axis=axis) | |
| def compute_corr(x, y): | |
| x = np.asarray(x) | |
| y = np.asarray(y) |