Tobias "Tobi" Schraink tobsecret

## gist:9fc6cc0f6cad771dcce6f689e13d43b7
some_channel.from(
                  [sample1, [file('s1_read1.fq'), file('s1_read2.fq')]],
                  [sample2, [file('s2_read1.fq'), file('s2_read1.fq')]]
                 )
some_channel
    .map {['dummykey', it.collate(1)]}
    .groupTuple()
    .map { it[1].transpose() }
    .map { it.collect {it.flatten()}}
    .view() // [[sample1, sample2], [[s1_read1.fq, s1_read2.fq], [s2_read1.fq, s2_read1.fq]]]

## gist:f5012255e22ee344fa4a55236d1f8758
epost -db sra -input SRA_ids.txt -format acc | esummary -format runinfo -mode xml | xtract -pattern Row -element Run,spots > spots.tsv
python -c "import pandas as pd; print(pd.read_csv('spots.tsv', sep='\t', header=None).iloc[:,1].agg(['median', 'mean', 'std', 'min', 'max']))"

## pipeline.nf
chroms = ['chr1', 'chr2', 'chr3'] //let's say we want to try out the first three chromosomes
samples = Channel
    .from (['n1', chroms],
           ['n2', chroms])
           //.subscribe {println it}
           /*
           [n1, [chr1, chr2, chr3]]
           [n2, [chr1, chr2, chr3]]
           */

## pipeline.nf
// Example on how to create a map from a channel:
samples = Channel.from(['1', file('1')], ['1_1', file('1_1')], ['1_2', file('1_2')], ['1_2_1', file('1_2_1')])
samples.reduce([:]) { map, i ->
    map[i[0]]=i[1]
        return map
            }.println()
//prints: [1:/home/user/1, 1_1:/home/user/1_1, 1_2:/home/user/1_2, 1_2_1:/home/user/1_2_1]

## gist:7e02c73951bf17547219ca8956fed8b6
from scipy.stats import hypergeom

cards_drawn_by_Book_of_Specters=3

for no_creatures in range(10, 26):
     print(
        'Number of creatures in deck on turn two: ', no_creatures,
        '\nprobability of drawing at least two cards on turn two on the play: ',
        hypergeom.sf(1, 25, #25 cards remain in deck on turn two on the play
        no_creatures, cards_drawn_by_Book_of_Specters),

## Old_Working_env_conda_list.txt
>conda list
# packages in environment at C:\Users\Tobias\Miniconda3\envs\DaskTutorial:
#
bleach                    1.5.0                    py35_0
bokeh                     0.12.5                   py35_0
bzip2                     1.0.6                    vc14_3  [vc14]
chest                     0.2.3                    py35_0
click                     6.7                      py35_0
cloudpickle               0.2.2                    py35_0
colorama                  0.3.9                    py35_0    conda-forge

## children.py
from decimal import Decimal
from itertools import permutations

#Brute-forcing all solutions from this article:
#https://www.theguardian.com/science/alexs-adventures-in-numberland/2015/may/20/can-you-do-the-maths-puzzle-for-vietnamese-eight-year-olds-that-has-stumped-parents-and-teachers
#Fill in the numbers from 1-9 (each number can only be used once) in the below equation to solve it:
#a + 13 * b / c + d + 12 * e - f - 11 + g * h / i - 10 == 66

def solution_correct(lst):
    #takes in an iterable and checks if it solves the equation when filled into the equation in reverse order:

## hvLayoutHolomapsError.py
import pandas as pd
import holoviews as hv; hv.extension('bokeh')

#Our dataset is in long form and contains data of four categories
#for each time point for each category, we have a mean and standard error ('sem') value
#Getting this kind of dataframe from your own dataframe with multiple occurrences for each category for each time point
#is very doable. Let's assume your longformdf has three columns: ['timepoint', 'category', 'value']
#ds = longformdf.groupby(by=['timepoint','category']).agg(['mean', 'sem']).loc[:,'value'].reset_index()

ds = pd.DataFrame({'category': {0: 1.0,  1: 2.0,  2: 3.0,  3: 4.0,  4: 1.0,  5: 2.0,  6: 3.0,  7: 4.0,  8: 1.0,  9: 2.0,  10: 3.0,  11: 4.0},

## Hv.CategoricalWithErrors.py
import pandas as pd
import holoviews as hv; hv.extension('bokeh')

#Our dataset is in long form and contains data of four categories
#for each time point for each category, we have a mean and standard error ('sem') value
#Getting this kind of dataframe from your own dataframe with multiple occurrences for each category for each time point
#is very doable. Let's assume your longformdf has three columns: ['timepoint', 'category', 'value']
#ds = longformdf.groupby(by=['timepoint','category']).agg(['mean', 'sem']).loc[:,'value'].reset_index()

ds = pd.DataFrame({'category': {0: 1.0,  1: 2.0,  2: 3.0,  3: 4.0,  4: 1.0,  5: 2.0,  6: 3.0,  7: 4.0,  8: 1.0,  9: 2.0,  10: 3.0,  11: 4.0},
	some_channel.from(
	[sample1, [file('s1_read1.fq'), file('s1_read2.fq')]],
	[sample2, [file('s2_read1.fq'), file('s2_read1.fq')]]
	)
	some_channel
	.map {['dummykey', it.collate(1)]}
	.groupTuple()
	.map { it[1].transpose() }
	.map { it.collect {it.flatten()}}
	.view() // [[sample1, sample2], [[s1_read1.fq, s1_read2.fq], [s2_read1.fq, s2_read1.fq]]]
	epost -db sra -input SRA_ids.txt -format acc \| esummary -format runinfo -mode xml \| xtract -pattern Row -element Run,spots > spots.tsv
	python -c "import pandas as pd; print(pd.read_csv('spots.tsv', sep='\t', header=None).iloc[:,1].agg(['median', 'mean', 'std', 'min', 'max']))"
	chroms = ['chr1', 'chr2', 'chr3'] //let's say we want to try out the first three chromosomes
	samples = Channel
	.from (['n1', chroms],
	['n2', chroms])
	//.subscribe {println it}
	/*
	[n1, [chr1, chr2, chr3]]
	[n2, [chr1, chr2, chr3]]
	*/
	// Example on how to create a map from a channel:
	samples = Channel.from(['1', file('1')], ['1_1', file('1_1')], ['1_2', file('1_2')], ['1_2_1', file('1_2_1')])
	samples.reduce([:]) { map, i ->
	map[i[0]]=i[1]
	return map
	}.println()
	//prints: [1:/home/user/1, 1_1:/home/user/1_1, 1_2:/home/user/1_2, 1_2_1:/home/user/1_2_1]
	from scipy.stats import hypergeom

	cards_drawn_by_Book_of_Specters=3

	for no_creatures in range(10, 26):
	print(
	'Number of creatures in deck on turn two: ', no_creatures,
	'\nprobability of drawing at least two cards on turn two on the play: ',
	hypergeom.sf(1, 25, #25 cards remain in deck on turn two on the play
	no_creatures, cards_drawn_by_Book_of_Specters),
	>conda list
	# packages in environment at C:\Users\Tobias\Miniconda3\envs\DaskTutorial:
	#
	bleach 1.5.0 py35_0
	bokeh 0.12.5 py35_0
	bzip2 1.0.6 vc14_3 [vc14]
	chest 0.2.3 py35_0
	click 6.7 py35_0
	cloudpickle 0.2.2 py35_0
	colorama 0.3.9 py35_0 conda-forge
	from decimal import Decimal
	from itertools import permutations

	#Brute-forcing all solutions from this article:
	#https://www.theguardian.com/science/alexs-adventures-in-numberland/2015/may/20/can-you-do-the-maths-puzzle-for-vietnamese-eight-year-olds-that-has-stumped-parents-and-teachers
	#Fill in the numbers from 1-9 (each number can only be used once) in the below equation to solve it:
	#a + 13 * b / c + d + 12 * e - f - 11 + g * h / i - 10 == 66

	def solution_correct(lst):
	#takes in an iterable and checks if it solves the equation when filled into the equation in reverse order:
	import pandas as pd
	import holoviews as hv; hv.extension('bokeh')

	#Our dataset is in long form and contains data of four categories
	#for each time point for each category, we have a mean and standard error ('sem') value
	#Getting this kind of dataframe from your own dataframe with multiple occurrences for each category for each time point
	#is very doable. Let's assume your longformdf has three columns: ['timepoint', 'category', 'value']
	#ds = longformdf.groupby(by=['timepoint','category']).agg(['mean', 'sem']).loc[:,'value'].reset_index()

	ds = pd.DataFrame({'category': {0: 1.0, 1: 2.0, 2: 3.0, 3: 4.0, 4: 1.0, 5: 2.0, 6: 3.0, 7: 4.0, 8: 1.0, 9: 2.0, 10: 3.0, 11: 4.0},