Francesco G. Brundu fbrundu

## fastcluster_to_k.py
import fastcluster as fc
import pandas as pd
import scipy.cluster.hierarchy as sch

# define total number of cluster to obtain
k = 5

# define matrix path
mat_path = 'matrix.txt'

## consensus_array.py
import pandas as pd

# load data
mat = pd.read_table('class_matrix.txt', index_col=0)

# initialize consensus array
consensus_a = pd.Series(index=mat.index)

# define columns subset on which compute consensus
# in this case all columns are used

## jprob_cmatrix.py
import numpy as np
import pandas as pd

# load data
mat = pd.read_table('matrix.txt', index_col=0)

# get classes
classes = np.unique(mat.values)
classes = classes[~np.isnan(classes)]

## .vimrc
" no vi-compatible
set nocompatible

let g:python_host_prog=$HOME.'/.pyenv/versions/neovim2/bin/python'
let g:python3_host_prog=$HOME.'/.pyenv/versions/neovim3/bin/python'

" Setting up Vundle - the vim plugin bundler
let iCanHazVundle=1
let vundle_readme=expand('~/.vim/bundle/vundle/README.md')
if !filereadable(vundle_readme)

## check_unique.sh
# line number after ==
# change \\t with the delimiter you want to use instead of tab for splitting to words
words_count=$(awk 'NR==5{print}' data.csv | tr \\t \\n | wc -l)
unique_words_count=$(awk 'NR==5{print}' data.csv | tr \\t \\n | uniq | wc -l)

if [[ "$words_count" -ne "$unique_words_counts" ]]; then
    echo "Not unique"
else
    echo "Unique"

## hyper.md

      
              1 file
            
          
              0 forks
            
          
              4 comments
            
          
              6 stars
            
          
                fbrundu
                / hyper.md
            
            
              Last active
              December 19, 2023 14:37
            
              
                Calculate hypergeometric probability with Python SciPy
              
          
    A poker hand consists of 5 cards dealt at random without replacement from a standard deck of 52 cards of which 26 are red and the rest black. A poker hand is dealt. Find the chance that the hand contains three red cards and two black cards.
To achieve it, we use the [hypergeometric][1] probability mass function.
We want 3 cards from the set of 26 red cards and 2 from the set of 26. So the parameters for the hypergeometric function are:
M = 52  # Total number of cards
n = 26  # Number of Type I cards (e.g. red cards) 
N = 5   # Number of draws (5 cards dealt in one poker hand)
k = 3   # Number of Type I cards we want in one hand


## binom.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              7 stars
            
          
                fbrundu
                / binom.md
            
            
              Last active
              February 17, 2021 18:12
            
              
                Calculate binomial probability in Python with SciPy
              
          
    If you bet on "red" at roulette, you have chance 18/38 of winning. Suppose you make a sequence of independent bets on “red” at roulette, with the decision that you will stop playing once you have won 5 times. What is the chance that after 15 bets you are still playing?
We use [binomial][1] probability mass function. To calculate the probability, you have to estimate the probability of having up to 4 successful bets after the 15th. So the final probability will be the sum of the probability to get 0 successful bets in 15 bets, plus the probability to get 1 successful bet, ..., to the probability of having 4 successful bets in 15 bets.
To achieve it:
import scipy.stats as ss

n = 15         # Number of total bets

p = 18./38 # Probability of getting "red" at the roulette

  
## tsv2gct.py
import pandas as pd
import sys
import glob
import os

# input / output directory
input_dir = sys.argv[1]
# input file extension
input_ext = sys.argv[2]
# cardinality of index columns (rownames)

## l2r_fsel_srs.py
import pandas as pd
import numpy as np
import sys
import random as rnd

csv = sys.argv[1]
out = sys.argv[2]

df = pd.read_table(csv, sep='\t', index_col=0)

## tcga_correct_samplenames.py
import pandas as pd
import sys
import re

tcga_tsv = sys.argv[1]

tcga = pd.read_table(tcga_tsv, sep='\t', index_col=0)

oldcolumns = tcga.columns.tolist()
newcolumns = ['-'.join(re.findall(r'TCGA[^_]*', oc)[0].split('-')[:4])
	import fastcluster as fc
	import pandas as pd
	import scipy.cluster.hierarchy as sch

	# define total number of cluster to obtain
	k = 5

	# define matrix path
	mat_path = 'matrix.txt'
	import pandas as pd

	# load data
	mat = pd.read_table('class_matrix.txt', index_col=0)

	# initialize consensus array
	consensus_a = pd.Series(index=mat.index)

	# define columns subset on which compute consensus
	# in this case all columns are used
	import numpy as np
	import pandas as pd

	# load data
	mat = pd.read_table('matrix.txt', index_col=0)

	# get classes
	classes = np.unique(mat.values)
	classes = classes[~np.isnan(classes)]
	" no vi-compatible
	set nocompatible

	let g:python_host_prog=$HOME.'/.pyenv/versions/neovim2/bin/python'
	let g:python3_host_prog=$HOME.'/.pyenv/versions/neovim3/bin/python'

	" Setting up Vundle - the vim plugin bundler
	let iCanHazVundle=1
	let vundle_readme=expand('~/.vim/bundle/vundle/README.md')
	if !filereadable(vundle_readme)
	# line number after ==
	# change \\t with the delimiter you want to use instead of tab for splitting to words
	words_count=$(awk 'NR==5{print}' data.csv \| tr \\t \\n \| wc -l)
	unique_words_count=$(awk 'NR==5{print}' data.csv \| tr \\t \\n \| uniq \| wc -l)

	if [[ "$words_count" -ne "$unique_words_counts" ]]; then
	echo "Not unique"
	else
	echo "Unique"
	import pandas as pd
	import sys
	import glob
	import os

	# input / output directory
	input_dir = sys.argv[1]
	# input file extension
	input_ext = sys.argv[2]
	# cardinality of index columns (rownames)
	import pandas as pd
	import sys
	import re

	tcga_tsv = sys.argv[1]

	tcga = pd.read_table(tcga_tsv, sep='\t', index_col=0)

	oldcolumns = tcga.columns.tolist()
	newcolumns = ['-'.join(re.findall(r'TCGA[^_]*', oc)[0].split('-')[:4])