Ajey Venkataraman ajey091

## likelihood4.py
x = np.linspace(0.0001,0.999,100)
LogL = n2*np.log(x) + (n1)*np.log(1-x)
max_x = scipy.optimize.fmin(lambda x: -(n2*np.log(x) + (n1)*np.log(1-x)), 0)
print (f'Maximum value of p={max_x}')
plt.plot(x,LogL)
plt.xlabel(r"$\Theta$")
plt.ylabel('Log Likelihood function')
plt.show()

## likelihood3.py
max_x = scipy.optimize.fmin(lambda x: -(x**n2 * (1-x)**n1), 0)
print (f'The likelihood function is maximum at {max_x}')

## likelihood2.py
# n samples from the distribution
n = 100
sample = rv.rvs(n)
print (sample)

# We will define n1 and n2 as the frequency of T and H respectively
n1 = sum(sample == 0)
n2 = n - n1

# We can now generate a likelihood function given the observation in terms of the unknown theta

## likelihood1.py
from scipy.stats import bernoulli
import scipy
import numpy as np
import matplotlib.pyplot as plt

p = 0.3
rv = bernoulli(p) # Initiating a Bernoulli process with a probability p

## stats10.py
n,m = len(first_weight), len(others_weight)
rand_meandiff = []
for _ in range(1000):
    pool = pd.concat((first_weight, others_weight))
    pool = pool.sample(frac=1).reset_index(drop=True)
    group1 = pool.iloc[:n]
    group2 = pool.iloc[n:]
    rand_meandiff.append(abs(np.mean(group1) - np.mean(group2)))

pval = sum(rand_meandiff > actual_diff)/1000

## stats9.py
import pandas as pd
first_weight = pd.read_csv('Firstborn_weight.csv')
others_weight = pd.read_csv('Othersborn_weight.csv')
print (f'Mean weight of first born babies={first_weight.mean()[0]:0.3f} lb')
print (f'Mean weight of other born babies={others_weight.mean()[0]:0.3f} lb')
actual_diff = abs(first_weight.mean()[0] - others_weight.mean()[0])

## stats8.py
def calc_stats(n, iters=1000):
    samplestat = []
    for _ in range(iters):
        samplestat.append(np.median(np.random.choice(sampleddata, n, replace=True)))
    samp_stat_mean = np.mean(samplestat)
    std_err = np.std(samplestat)
    conf_int = np.percentile(samplestat, [2.5,97.5])
    return samp_stat_mean,std_err,conf_int,samplestat

estimates = calc_stats(100,1000)

## stats7.py
plt.plot(np.linspace(10,500,20),conf1, label='Lower limit')
plt.plot(np.linspace(10,500,20),conf2, label='Upper limit')
plt.xlabel('Number of sampled values from the dataset')
plt.ylabel('95% confidence interval')
plt.legend()
plt.show()

## stats6.py
std_err = []
conf1 = []
conf2 = []
for n in np.linspace(10,500,20):
    estimates = calc_stats(int(n))
    std_err.append(estimates[1])
    conf1.append(estimates[2][0])
    conf2.append(estimates[2][1])

plt.plot(np.linspace(10,500,20),std_err)

## stats5.py
def calc_stats(n, iters=1000):
    samplestat = []
    for _ in range(iters):
        samplestat.append(np.mean(np.random.choice(sampleddata, n, replace=True)))
    samp_stat_mean = np.mean(samplestat)
    std_err = np.std(samplestat)
    conf_int = np.percentile(samplestat, [2.5,97.5])
    return samp_stat_mean,std_err,conf_int,samplestat

estimates = calc_stats(100,1000)
	x = np.linspace(0.0001,0.999,100)
	LogL = n2np.log(x) + (n1)np.log(1-x)
	max_x = scipy.optimize.fmin(lambda x: -(n2np.log(x) + (n1)np.log(1-x)), 0)
	print (f'Maximum value of p={max_x}')
	plt.plot(x,LogL)
	plt.xlabel(r"$\Theta$")
	plt.ylabel('Log Likelihood function')
	plt.show()
	max_x = scipy.optimize.fmin(lambda x: -(x*n2 (1-x)**n1), 0)
	print (f'The likelihood function is maximum at {max_x}')
	# n samples from the distribution
	n = 100
	sample = rv.rvs(n)
	print (sample)

	# We will define n1 and n2 as the frequency of T and H respectively
	n1 = sum(sample == 0)
	n2 = n - n1

	# We can now generate a likelihood function given the observation in terms of the unknown theta
	from scipy.stats import bernoulli
	import scipy
	import numpy as np
	import matplotlib.pyplot as plt

	p = 0.3
	rv = bernoulli(p) # Initiating a Bernoulli process with a probability p
	n,m = len(first_weight), len(others_weight)
	rand_meandiff = []
	for _ in range(1000):
	pool = pd.concat((first_weight, others_weight))
	pool = pool.sample(frac=1).reset_index(drop=True)
	group1 = pool.iloc[:n]
	group2 = pool.iloc[n:]
	rand_meandiff.append(abs(np.mean(group1) - np.mean(group2)))

	pval = sum(rand_meandiff > actual_diff)/1000
	import pandas as pd
	first_weight = pd.read_csv('Firstborn_weight.csv')
	others_weight = pd.read_csv('Othersborn_weight.csv')
	print (f'Mean weight of first born babies={first_weight.mean()[0]:0.3f} lb')
	print (f'Mean weight of other born babies={others_weight.mean()[0]:0.3f} lb')
	actual_diff = abs(first_weight.mean()[0] - others_weight.mean()[0])
	def calc_stats(n, iters=1000):
	samplestat = []
	for _ in range(iters):
	samplestat.append(np.median(np.random.choice(sampleddata, n, replace=True)))
	samp_stat_mean = np.mean(samplestat)
	std_err = np.std(samplestat)
	conf_int = np.percentile(samplestat, [2.5,97.5])
	return samp_stat_mean,std_err,conf_int,samplestat

	estimates = calc_stats(100,1000)
	plt.plot(np.linspace(10,500,20),conf1, label='Lower limit')
	plt.plot(np.linspace(10,500,20),conf2, label='Upper limit')
	plt.xlabel('Number of sampled values from the dataset')
	plt.ylabel('95% confidence interval')
	plt.legend()
	plt.show()
	std_err = []
	conf1 = []
	conf2 = []
	for n in np.linspace(10,500,20):
	estimates = calc_stats(int(n))
	std_err.append(estimates[1])
	conf1.append(estimates[2][0])
	conf2.append(estimates[2][1])

	plt.plot(np.linspace(10,500,20),std_err)