Iglewicz and Hoaglin outlier test (modified Z-score test)
# based on: | |
# * https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm | |
# * http://colingorrie.github.io/outlier-detection.html#modified-z-score-method | |
# * http://contchart.com/outliers.aspx | |
# * https://stats.stackexchange.com/questions/339932/iglewicz-and-hoaglin-outlier-test-with-modified-z-scores-what-should-i-do-if-t | |
def calc_median(data): | |
result = 0.0 | |
length = len(data) | |
if length % 2 == 0: | |
index01 = (length/2) - 1 | |
index02 = index01 + 1 | |
result = (data[index01] + data[index02])/2.0 | |
else: | |
result = data[length//2] | |
return result | |
def calc_mad(data, median): | |
medians = [] | |
for x in data: | |
medians.append(abs(x - median)) | |
medians.sort() | |
mad = calc_median(medians) | |
if mad == 0: | |
mad = 2.2250738585072014e-308 # sys.float_info.min | |
return mad | |
def iglewicz_hoaglin(threshold, data): | |
data.sort() | |
median = calc_median(data) | |
mad = calc_mad(data, median) | |
result = [] | |
print('threshold:' + str(threshold)) | |
print('median:' + str(median)) | |
print('MAD:' + str(mad)) | |
print('data:'+ str(data)) | |
print('') | |
for x in data: | |
score = abs(0.6745 * (x - median) / mad) | |
print(str(x) + ':\t' + str(score)) | |
if(score > threshold): | |
result.append(x) | |
return result | |
threshold = 3.5 | |
data = [10, 22, 30, 100, 15, 80, 8, 9] | |
modified_z_scores = iglewicz_hoaglin(threshold,data) | |
print('\nresult: '+ str(modified_z_scores)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment