Skip to content

Instantly share code, notes, and snippets.

@jamal919
Created November 15, 2021 11:46
Show Gist options
  • Save jamal919/f80ae4010dc866f41144482fa06eade8 to your computer and use it in GitHub Desktop.
Save jamal919/f80ae4010dc866f41144482fa06eade8 to your computer and use it in GitHub Desktop.
Outlier detection based on modified tau
# -*- coding: utf-8 -*-
"""
Outlier detection based on modified tau
author: khan
"""
import numpy as np
from scipy.stats import t
# tau table
nsample = np.arange(3, 21) # depending on sample size, t-distribution will vary
alpha = 0.05 # 95 percent confidence interval
t_alpha = t.ppf(1-alpha/2, nsample-2) # 2 degree of freedom
# Threshold for the modified thompson tau
threshold = t_alpha*(nsample-1)/(np.sqrt(nsample)*np.sqrt(nsample-2+t_alpha**2))
print('nsample, t_alpha, threshold')
print(np.array([nsample, t_alpha, threshold]).T)
# Test
# Based on the example shown in https://www.statisticshowto.com/modified-thompson-tau-test/
samples = np.array([489, 490, 490, 491, 494, 499, 499, 500, 501, 505])
print(samples)
nsample = len(samples)
mu = np.mean(samples) # dof is not considered
std = np.std(samples, ddof=1)
print(f'sample mean: {mu}, std: {std}')
min_max = [np.min(samples), np.max(samples)]
imin_max = [np.argmin(samples), np.argmax(samples)]
print('min and max', min_max)
print('min and max location', imin_max)
delta_min_max = np.abs(np.array(min_max)-mu)
print(delta_min_max)
selected_point = np.max(delta_min_max)
iselected_point = np.argmax(delta_min_max)
print(f'index of selected point {iselected_point}, delta vaule {selected_point:.2f}, corresponding sample {samples[imin_max[iselected_point]]}')
t_alpha = t.ppf(1-alpha/2, nsample-2)
tau = t_alpha*(nsample-1)/(np.sqrt(nsample)*np.sqrt(nsample-2+t_alpha**2))
print('tau', tau)
threshold = tau*std
print('threshold', threshold)
if selected_point < threshold:
print(f'deviation of the selected point {selected_point:.3f} is smaller than threshold {threshold:.3f}. Not an outlier.')
else:
print(f'deviation of the selected point {selected_point:.3f} is greater than threshold {threshold:.3f}. Outlier!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment