Skip to content

Instantly share code, notes, and snippets.

@Szuuuken
Last active April 28, 2018 16:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Szuuuken/1c9fc3ab9d08d6a12bad5b929cbb5a46 to your computer and use it in GitHub Desktop.
Save Szuuuken/1c9fc3ab9d08d6a12bad5b929cbb5a46 to your computer and use it in GitHub Desktop.
Iglewicz and Hoaglin outlier test (modified Z-score test)
# based on:
# * https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
# * http://colingorrie.github.io/outlier-detection.html#modified-z-score-method
# * http://contchart.com/outliers.aspx
# * https://stats.stackexchange.com/questions/339932/iglewicz-and-hoaglin-outlier-test-with-modified-z-scores-what-should-i-do-if-t
def calc_median(data):
result = 0.0
length = len(data)
if length % 2 == 0:
index01 = (length/2) - 1
index02 = index01 + 1
result = (data[index01] + data[index02])/2.0
else:
result = data[length//2]
return result
def calc_mad(data, median):
medians = []
for x in data:
medians.append(abs(x - median))
medians.sort()
mad = calc_median(medians)
if mad == 0:
mad = 2.2250738585072014e-308 # sys.float_info.min
return mad
def iglewicz_hoaglin(threshold, data):
data.sort()
median = calc_median(data)
mad = calc_mad(data, median)
result = []
print('threshold:' + str(threshold))
print('median:' + str(median))
print('MAD:' + str(mad))
print('data:'+ str(data))
print('')
for x in data:
score = abs(0.6745 * (x - median) / mad)
print(str(x) + ':\t' + str(score))
if(score > threshold):
result.append(x)
return result
threshold = 3.5
data = [10, 22, 30, 100, 15, 80, 8, 9]
modified_z_scores = iglewicz_hoaglin(threshold,data)
print('\nresult: '+ str(modified_z_scores))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment