Francesco G. Brundu fbrundu

## hyper.md

      
              1 file
            
          
              0 forks
            
          
              4 comments
            
          
              6 stars
            
          
                fbrundu
                / hyper.md
            
            
              Last active
              December 19, 2023 14:37
            
              
                Calculate hypergeometric probability with Python SciPy
              
          
    A poker hand consists of 5 cards dealt at random without replacement from a standard deck of 52 cards of which 26 are red and the rest black. A poker hand is dealt. Find the chance that the hand contains three red cards and two black cards.
To achieve it, we use the [hypergeometric][1] probability mass function.
We want 3 cards from the set of 26 red cards and 2 from the set of 26. So the parameters for the hypergeometric function are:
M = 52  # Total number of cards
n = 26  # Number of Type I cards (e.g. red cards) 
N = 5   # Number of draws (5 cards dealt in one poker hand)
k = 3   # Number of Type I cards we want in one hand


## check_unique.sh
# line number after ==
# change \\t with the delimiter you want to use instead of tab for splitting to words
words_count=$(awk 'NR==5{print}' data.csv | tr \\t \\n | wc -l)
unique_words_count=$(awk 'NR==5{print}' data.csv | tr \\t \\n | uniq | wc -l)

if [[ "$words_count" -ne "$unique_words_counts" ]]; then
    echo "Not unique"
else
    echo "Unique"

## .vimrc
" no vi-compatible
set nocompatible

let g:python_host_prog=$HOME.'/.pyenv/versions/neovim2/bin/python'
let g:python3_host_prog=$HOME.'/.pyenv/versions/neovim3/bin/python'

" Setting up Vundle - the vim plugin bundler
let iCanHazVundle=1
let vundle_readme=expand('~/.vim/bundle/vundle/README.md')
if !filereadable(vundle_readme)

## jprob_cmatrix.py
import numpy as np
import pandas as pd

# load data
mat = pd.read_table('matrix.txt', index_col=0)

# get classes
classes = np.unique(mat.values)
classes = classes[~np.isnan(classes)]

## consensus_array.py
import pandas as pd

# load data
mat = pd.read_table('class_matrix.txt', index_col=0)

# initialize consensus array
consensus_a = pd.Series(index=mat.index)

# define columns subset on which compute consensus
# in this case all columns are used

## fastcluster_to_k.py
import fastcluster as fc
import pandas as pd
import scipy.cluster.hierarchy as sch

# define total number of cluster to obtain
k = 5

# define matrix path
mat_path = 'matrix.txt'
	# line number after ==
	# change \\t with the delimiter you want to use instead of tab for splitting to words
	words_count=$(awk 'NR==5{print}' data.csv \| tr \\t \\n \| wc -l)
	unique_words_count=$(awk 'NR==5{print}' data.csv \| tr \\t \\n \| uniq \| wc -l)

	if [[ "$words_count" -ne "$unique_words_counts" ]]; then
	echo "Not unique"
	else
	echo "Unique"
	" no vi-compatible
	set nocompatible

	let g:python_host_prog=$HOME.'/.pyenv/versions/neovim2/bin/python'
	let g:python3_host_prog=$HOME.'/.pyenv/versions/neovim3/bin/python'

	" Setting up Vundle - the vim plugin bundler
	let iCanHazVundle=1
	let vundle_readme=expand('~/.vim/bundle/vundle/README.md')
	if !filereadable(vundle_readme)
	import numpy as np
	import pandas as pd

	# load data
	mat = pd.read_table('matrix.txt', index_col=0)

	# get classes
	classes = np.unique(mat.values)
	classes = classes[~np.isnan(classes)]
	import pandas as pd

	# load data
	mat = pd.read_table('class_matrix.txt', index_col=0)

	# initialize consensus array
	consensus_a = pd.Series(index=mat.index)

	# define columns subset on which compute consensus
	# in this case all columns are used
	import fastcluster as fc
	import pandas as pd
	import scipy.cluster.hierarchy as sch

	# define total number of cluster to obtain
	k = 5

	# define matrix path
	mat_path = 'matrix.txt'