Skip to content

Instantly share code, notes, and snippets.

@kentatogashi
Created July 3, 2018 07:52
Show Gist options
  • Save kentatogashi/4a1dff6dc492d2a6df9c12856142c0b1 to your computer and use it in GitHub Desktop.
Save kentatogashi/4a1dff6dc492d2a6df9c12856142c0b1 to your computer and use it in GitHub Desktop.
gesd.py
import numpy as np
from scipy import stats
data = np.random.randn(10)
def make_datasets(n=1000, r=3):
data = np.abs(np.random.randn(n)*10)
for i in range(r):
ab_ind = np.random.randint(n)
data[ab_ind] += 100
return data
def gesd(data, r, alpha=0.005):
n = len(data)
R = np.zeros(r)
lambda_ = np.zeros(n)
outlier_ind = np.zeros(n)
outlier_val = np.zeros(n)
m = 0
data_new = data
for i in range(r):
z = abs(data.mean() - data) / data.std()
max_ind = z.argmax()
R[i] = z[max_ind]
outlier_val[i] = data_new[max_ind]
data_new = np.delete(data_new, max_ind)
data_comp = data_new[max_ind] == data
data_ind = -1
for j, k in enumerate(data_comp):
if k == True:
data_ind = j
break
outlier_ind[i] = data_ind
p = 1 - alpha/2*(n-i+1)
t_pv = stats.t.ppf(p, n-i+1)
lambda_[i] = ((n-i)*t_pv) / (((n-i-1+t_pv**2)*(n-i+1))**1/2)
if (R[i] == None and lambda_[i] == None):
if R[i] > lambda_[i]:
m = i
if m > 0:
for i in range(i):
print(outlier_val[i])
if __name__ == '__main__':
data = make_datasets()
gesd(data, r=5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment