marnunez/Normality.py

## Normality.py
from scipy.stats import shapiro, normaltest, anderson, norm
from statsmodels.graphics.gofplots import qqplot
import seaborn as sns
from colorama import Fore
import matplotlib.pyplot as plt
import numpy as np

sns.set(color_codes=True)

def check_shapiro(data, alpha=0.05):
    """ Check Shapiro-Wilk test. If p > alpha, reject H0 => it's Gaussian """
    stat, p = shapiro(data)
    return True if p > alpha else False


def check_dagostino(data, alpha=0.05):
    """ Check D’Agostino K^2 test. If p > alpha, reject H0 => it's Gaussian """
    stat, p = normaltest(data)
    return True if p > alpha else False


def check_anderson(data):
    """ Check Anderson-Darling test for normal distribution.
        Returns True if it can't reject H0 to 15%, 10% or 5%. => it's Gaussian
        Returns False otherwise
    """
    result = anderson(data, dist="norm")
    return next(
        (
            True
            for sig_value, crit_value in zip(
                result.significance_level[0:3], result.critical_values[0:3]
            )
            if result.statistic < crit_value
        ),
        False,
    )


def is_normal(data):
    """ Check for normality.
    Returns True if 2 out of 3 statistical tests result in normality
    Returns False otherwise
    """
    return (
        True
        if [check_shapiro(data), check_dagostino(data), check_anderson(data)].count(True) >= 2
        else False
    )

def analyze_normality(df):
    """ Visual aid for normality check.
    For every numeric column in df:
     - distribution plot with kde and fit to normal
     - qq plot
     - results of all three statistical tests
    """
    si = Fore.GREEN + "NORMAL" + Fore.RESET
    no = Fore.RED + "NO NORMAL" + Fore.RESET
    for column in [i for i in df.columns if df[i].dtype.kind in 'biufc']:
        data = df[column].dropna()
        print(f"{column}: {len(data)} puntos")
        fig, ax = plt.subplots(1, 2, figsize=(7, 5))
        sns.distplot(data, ax=ax[0], fit=norm)
        qqplot(data, line="s", ax=ax[1])
        plt.show()

        print(f"\tSHAPIRO:\t{si if check_shapiro(data,0.05) else no}")
        print(f"\tD'AGOSTINO:\t{si if check_dagostino(data,0.05) else no}")
        print(f"\tANDERSON:\t{si if check_anderson(data) else no}")
        print("\n")
	from scipy.stats import shapiro, normaltest, anderson, norm
	from statsmodels.graphics.gofplots import qqplot
	import seaborn as sns
	from colorama import Fore
	import matplotlib.pyplot as plt
	import numpy as np

	sns.set(color_codes=True)

	def check_shapiro(data, alpha=0.05):
	""" Check Shapiro-Wilk test. If p > alpha, reject H0 => it's Gaussian """
	stat, p = shapiro(data)
	return True if p > alpha else False


	def check_dagostino(data, alpha=0.05):
	""" Check D’Agostino K^2 test. If p > alpha, reject H0 => it's Gaussian """
	stat, p = normaltest(data)
	return True if p > alpha else False


	def check_anderson(data):
	""" Check Anderson-Darling test for normal distribution.
	Returns True if it can't reject H0 to 15%, 10% or 5%. => it's Gaussian
	Returns False otherwise
	"""
	result = anderson(data, dist="norm")
	return next(
	(
	True
	for sig_value, crit_value in zip(
	result.significance_level[0:3], result.critical_values[0:3]
	)
	if result.statistic < crit_value
	),
	False,
	)


	def is_normal(data):
	""" Check for normality.
	Returns True if 2 out of 3 statistical tests result in normality
	Returns False otherwise
	"""
	return (
	True
	if [check_shapiro(data), check_dagostino(data), check_anderson(data)].count(True) >= 2
	else False
	)

	def analyze_normality(df):
	""" Visual aid for normality check.
	For every numeric column in df:
	- distribution plot with kde and fit to normal
	- qq plot
	- results of all three statistical tests
	"""
	si = Fore.GREEN + "NORMAL" + Fore.RESET
	no = Fore.RED + "NO NORMAL" + Fore.RESET
	for column in [i for i in df.columns if df[i].dtype.kind in 'biufc']:
	data = df[column].dropna()
	print(f"{column}: {len(data)} puntos")
	fig, ax = plt.subplots(1, 2, figsize=(7, 5))
	sns.distplot(data, ax=ax[0], fit=norm)
	qqplot(data, line="s", ax=ax[1])
	plt.show()

	print(f"\tSHAPIRO:\t{si if check_shapiro(data,0.05) else no}")
	print(f"\tD'AGOSTINO:\t{si if check_dagostino(data,0.05) else no}")
	print(f"\tANDERSON:\t{si if check_anderson(data) else no}")
	print("\n")