Last active
August 8, 2018 11:59
Helper functions..
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SDSP = Sample Distribution of Sample Proportions | |
# This is helper file for programmatic illustrations of SDSP concepts. | |
from random import shuffle | |
import pandas as pd | |
def create_bernoulli_population(N, p): | |
""" | |
Given the total size of population N, probability of a specific outcome, | |
and associated bernoulli variable as list (of outcomes), this returns a shuffled | |
population list | |
N - Population size, eg N=10000 | |
p - probability of interested outcome | |
Returns list of 1s and 0s. 1 - indicates the interested outcome, 0 - otherwise | |
""" | |
population_yellow = [1]*(int(p*N)) | |
population_others = [0]*(int((1-p)*N)) | |
population = population_yellow + population_others | |
shuffle(population) | |
return population | |
def get_frequency_df(raw_list): | |
""" | |
Given a raw list, this provides frequency of duplicate items along with its probability | |
Eg: | |
X n(X) p(X) | |
0 4000 0.4 | |
1 6000 0.6 | |
If you assume 1 indicates, say a yellow ball, 0 otherwise, then there are 6000 yellow balls | |
in given population list, so p(yellow_balls) = 0.6 | |
""" | |
# first convert to dictionary of values | |
dummy_dict = {i:raw_list.count(i) for i in raw_list} | |
freq_dict = {'x':[], 'n(x)':[]} | |
freq_dict['x'] = list(dummy_dict.keys()) | |
freq_dict['n(x)'] = list(dummy_dict.values()) | |
# dictionary to pd easy transform | |
freq_df = pd.DataFrame.from_dict(freq_dict) | |
freq_df = freq_df[['x','n(x)']] | |
total = freq_df['n(x)'].sum() | |
freq_df['p(x)'] = freq_df['n(x)']/total | |
freq_df.sort_values('x', inplace=True) | |
return freq_df | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment