Skip to content

Instantly share code, notes, and snippets.

@sachinsdate
Created July 11, 2021 16:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sachinsdate/ad8c39d261df976d805633c782b2e7fc to your computer and use it in GitHub Desktop.
Save sachinsdate/ad8c39d261df976d805633c782b2e7fc to your computer and use it in GitHub Desktop.
Calculation of the interval estimate for the population mean at a specified confidence level
import math
import matplotlib.pyplot as plt
from scipy.stats import invweibull
from scipy.stats import norm
import numpy as np
import pandas as pd
#Load the data file
df = pd.read_csv('DOHMH_Beach_Water_Quality_Data.csv', header=0, infer_datetime_format=True, parse_dates=['Sample_Date'])
#filter out the data for our beach of interest, which is the MIDLAND BEACH
df_midland = df[df['Beach_Name']=='MIDLAND BEACH']
#print the data frame
print(df_midland)
#replace all these NaNs with 0
df_midland.fillna(value=0,inplace=True)
#print out the summary statistics for the sample
df_midland['Enterococci_Results'].describe()
#print out one more statistic which is the most frequently occurring value a.k.a. the mode
print(df_midland['Enterococci_Results'].mode())
#The following plot shows the frequency distribution of sample values:
plt.hist(df_midland['Enterococci_Results'], bins=100)
plt.xlabel('Number of Enterococci detected in the sample')
plt.ylabel('Number of samples')
plt.show()
#Calculate the interval estimate for the population mean mu
#sample size n
n = len(df_midland['Enterococci_Results'])
#sample mean Y
Y = df_midland['Enterococci_Results'].mean()
#sample standard deviation
S = df_midland['Enterococci_Results'].std()
#significance alpha (1-alpha)*100 = 95%
alpha = 0.05
#p-value for required alpha
p = alpha / 2
#z value for the specified p-value
z_p=norm.ppf(1-p)
#mu_low
mu_low = Y-z_p*S/math.sqrt(n)
#mu_high
mu_high = Y+z_p*S/math.sqrt(n)
print('95% Confidence intervals for the population mean (mu)='+str((mu_low, mu_high)))
#############################################################################
#plot the pdf of the inverse Weibull distribution.
fig = plt.figure()
fig.suptitle('Probability Density Function f(x)')
plt.xlabel('x')
plt.ylabel('Probability density')
c = 100
x = np.linspace(invweibull.ppf(0.00000001, c), invweibull.ppf(0.999999999, c), 10000)
y = invweibull.pdf(x, c)
plt.plot(x, y, 'r-', linewidth=1, alpha=0.6, color='black')
mu_l = 0.99
mu_h = 1.035
#shaded_x = x[np.logical_and(x >= mu_l, x <= mu_h)]
#plt.fill_between(shaded_x, invweibull.pdf(shaded_x, c), color='blue', alpha=0.65, linewidth=0)
shaded_x_low = x[np.logical_and(x >= 0, x <= mu_l)]
plt.fill_between(shaded_x_low, invweibull.pdf(shaded_x_low, c), color='blue', alpha=0.65, linewidth=0)
shaded_x_high = x[np.logical_and(x >= mu_h, x <= 10000)]
plt.fill_between(shaded_x_high, invweibull.pdf(shaded_x_high, c), color='blue', alpha=0.65, linewidth=0)
plt.show()
#############################################################################
#plot the pdf of the Normal distribution.
from scipy.stats import norm
fig = plt.figure()
fig.suptitle('Probability Density Function f(x)')
plt.xlabel('x')
plt.ylabel('Probability density')
x = np.linspace(norm.ppf(0.00000001), norm.ppf(0.999999999), 10000)
y = norm.pdf(x)
plt.plot(x, y, 'r-', linewidth=1, alpha=0.6, color='black')
z_l = -1000
z_h = -1.645
shaded_x = x[np.logical_and(x >= z_l, x <= z_h)]
plt.fill_between(shaded_x, norm.pdf(shaded_x), color='blue', alpha=0.65, linewidth=0)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment