Skip to content

Instantly share code, notes, and snippets.

@HeenaR17
Created March 23, 2021 09:13
Show Gist options
  • Save HeenaR17/968274e1cf461cdf1be75f4097217dce to your computer and use it in GitHub Desktop.
Save HeenaR17/968274e1cf461cdf1be75f4097217dce to your computer and use it in GitHub Desktop.
import scipy.stats as stats
import seaborn as sns
import pandas as pd
import numpy as np
dataset=sns.load_dataset("tips")
dataset.head()
# 2 categorical features
dataset_table=pd.crosstab(dataset['sex'],dataset['smoker'])
print(dataset_table)
#Output:smoker Yes No
sex
Male 60 97
Female 33 54
# Observed values
obs=dataset_table.values
print(obs)
# Expected values
val=stats.chi2_contingency(dataset_table)
val
expected=val[3]
#Output: array([[59.84016393, 97.15983607],
[33.15983607, 53.84016393]]))
# Degree of freedom
nrows=2
ncol=2
df=(nrows-1)*(ncol-1)
print("Degree of freedom",df)
alpha=0.05
#Output: Degree of freedom 1
# Implementing chi square formula
from scipy.stats import chi2
chi_sq=sum([(o-e)**2./e for o,e in zip(obs,expected)])
chi_sq_statistic=chi_sq[0]+chi_sq[1]
chi_sq_statistic
#Output: 0.001934818536627623
# Critical value
critical_value=chi2.ppf(q=1-alpha,df=df)
critical_value
#Output: 3.841458820694124
if chi_sq_statistic>=critical_value:
print("Reject Null Hypothesis; there is a relationship between the variables")
else:
print("Accept Null Hypothesis; no relationship between the variables")
#Output: Accept H0; no relationship between the variables
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment