Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Normality assessment tools
from scipy.stats import shapiro, normaltest, anderson, norm
from statsmodels.graphics.gofplots import qqplot
import seaborn as sns
from colorama import Fore
import matplotlib.pyplot as plt
import numpy as np
sns.set(color_codes=True)
def check_shapiro(data, alpha=0.05):
""" Check Shapiro-Wilk test. If p > alpha, reject H0 => it's Gaussian """
stat, p = shapiro(data)
return True if p > alpha else False
def check_dagostino(data, alpha=0.05):
""" Check D’Agostino K^2 test. If p > alpha, reject H0 => it's Gaussian """
stat, p = normaltest(data)
return True if p > alpha else False
def check_anderson(data):
""" Check Anderson-Darling test for normal distribution.
Returns True if it can't reject H0 to 15%, 10% or 5%. => it's Gaussian
Returns False otherwise
"""
result = anderson(data, dist="norm")
return next(
(
True
for sig_value, crit_value in zip(
result.significance_level[0:3], result.critical_values[0:3]
)
if result.statistic < crit_value
),
False,
)
def is_normal(data):
""" Check for normality.
Returns True if 2 out of 3 statistical tests result in normality
Returns False otherwise
"""
return (
True
if [check_shapiro(data), check_dagostino(data), check_anderson(data)].count(True) >= 2
else False
)
def analyze_normality(df):
""" Visual aid for normality check.
For every numeric column in df:
- distribution plot with kde and fit to normal
- qq plot
- results of all three statistical tests
"""
si = Fore.GREEN + "NORMAL" + Fore.RESET
no = Fore.RED + "NO NORMAL" + Fore.RESET
for column in [i for i in df.columns if df[i].dtype.kind in 'biufc']:
data = df[column].dropna()
print(f"{column}: {len(data)} puntos")
fig, ax = plt.subplots(1, 2, figsize=(7, 5))
sns.distplot(data, ax=ax[0], fit=norm)
qqplot(data, line="s", ax=ax[1])
plt.show()
print(f"\tSHAPIRO:\t{si if check_shapiro(data,0.05) else no}")
print(f"\tD'AGOSTINO:\t{si if check_dagostino(data,0.05) else no}")
print(f"\tANDERSON:\t{si if check_anderson(data) else no}")
print("\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.