Skip to content

Instantly share code, notes, and snippets.

x = np.linspace(0.0001,0.999,100)
LogL = n2*np.log(x) + (n1)*np.log(1-x)
max_x = scipy.optimize.fmin(lambda x: -(n2*np.log(x) + (n1)*np.log(1-x)), 0)
print (f'Maximum value of p={max_x}')
plt.plot(x,LogL)
plt.xlabel(r"$\Theta$")
plt.ylabel('Log Likelihood function')
plt.show()
max_x = scipy.optimize.fmin(lambda x: -(x**n2 * (1-x)**n1), 0)
print (f'The likelihood function is maximum at {max_x}')
# n samples from the distribution
n = 100
sample = rv.rvs(n)
print (sample)
# We will define n1 and n2 as the frequency of T and H respectively
n1 = sum(sample == 0)
n2 = n - n1
# We can now generate a likelihood function given the observation in terms of the unknown theta
from scipy.stats import bernoulli
import scipy
import numpy as np
import matplotlib.pyplot as plt
p = 0.3
rv = bernoulli(p) # Initiating a Bernoulli process with a probability p
n,m = len(first_weight), len(others_weight)
rand_meandiff = []
for _ in range(1000):
pool = pd.concat((first_weight, others_weight))
pool = pool.sample(frac=1).reset_index(drop=True)
group1 = pool.iloc[:n]
group2 = pool.iloc[n:]
rand_meandiff.append(abs(np.mean(group1) - np.mean(group2)))
pval = sum(rand_meandiff > actual_diff)/1000
import pandas as pd
first_weight = pd.read_csv('Firstborn_weight.csv')
others_weight = pd.read_csv('Othersborn_weight.csv')
print (f'Mean weight of first born babies={first_weight.mean()[0]:0.3f} lb')
print (f'Mean weight of other born babies={others_weight.mean()[0]:0.3f} lb')
actual_diff = abs(first_weight.mean()[0] - others_weight.mean()[0])
def calc_stats(n, iters=1000):
samplestat = []
for _ in range(iters):
samplestat.append(np.median(np.random.choice(sampleddata, n, replace=True)))
samp_stat_mean = np.mean(samplestat)
std_err = np.std(samplestat)
conf_int = np.percentile(samplestat, [2.5,97.5])
return samp_stat_mean,std_err,conf_int,samplestat
estimates = calc_stats(100,1000)
plt.plot(np.linspace(10,500,20),conf1, label='Lower limit')
plt.plot(np.linspace(10,500,20),conf2, label='Upper limit')
plt.xlabel('Number of sampled values from the dataset')
plt.ylabel('95% confidence interval')
plt.legend()
plt.show()
std_err = []
conf1 = []
conf2 = []
for n in np.linspace(10,500,20):
estimates = calc_stats(int(n))
std_err.append(estimates[1])
conf1.append(estimates[2][0])
conf2.append(estimates[2][1])
plt.plot(np.linspace(10,500,20),std_err)
def calc_stats(n, iters=1000):
samplestat = []
for _ in range(iters):
samplestat.append(np.mean(np.random.choice(sampleddata, n, replace=True)))
samp_stat_mean = np.mean(samplestat)
std_err = np.std(samplestat)
conf_int = np.percentile(samplestat, [2.5,97.5])
return samp_stat_mean,std_err,conf_int,samplestat
estimates = calc_stats(100,1000)