cnske/windowed_anomaly.py

## windowed_anomaly.py
def window_anomaly(df, col, window_length, qlow=0.05, qhigh=0.95):
    """
    Anomalie detection on sliding window approach.
    Partly based on: https://medium.com/@krzysztofdrelczuk/time-series-anomaly-detection-with-python-example-a92ef262f09a

    Input
    df:             dataframe
    col:            column name as str
    window_length:  length of the window in steps of the data
    qlow:
    qhigh:

    Return
    lower:          lower band of the data
    upper:          upper band of the data
    anomaly:        anomalies as time-series marked with boolean
    percentage:     amount of peaks in percent to real data
    """

    # Set the Window to clothest integer and get total length of variable
    k = int(window_length/2)
    N = len(df[col])

    # Get the lower and upper bands of date for each window
    get_bands = lambda df: (df.quantile(qlow)-df.mean(), df.quantile(qhigh)+df.mean())
    bands = [get_bands(df[col][range(0 if i-k<0 else i-k, i+k if i+k < N else N)]) for i in range(0, N)]
    lower, upper = zip(*bands) # zip iterates through the given variables

    # Mark anomalies with boolean
    anomaly = (df[col] < lower) | (df[col] > upper)

    # Get percentage of anomalies to datalength
    percentage = np.round(len(anomaly[anomaly==True])/len(df[col])*100, 3)

    return lower, upper, anomaly, percentage
	def window_anomaly(df, col, window_length, qlow=0.05, qhigh=0.95):
	"""
	Anomalie detection on sliding window approach.
	Partly based on: https://medium.com/@krzysztofdrelczuk/time-series-anomaly-detection-with-python-example-a92ef262f09a

	Input
	df: dataframe
	col: column name as str
	window_length: length of the window in steps of the data
	qlow:
	qhigh:

	Return
	lower: lower band of the data
	upper: upper band of the data
	anomaly: anomalies as time-series marked with boolean
	percentage: amount of peaks in percent to real data
	"""

	# Set the Window to clothest integer and get total length of variable
	k = int(window_length/2)
	N = len(df[col])

	# Get the lower and upper bands of date for each window
	get_bands = lambda df: (df.quantile(qlow)-df.mean(), df.quantile(qhigh)+df.mean())
	bands = [get_bands(df[col][range(0 if i-k<0 else i-k, i+k if i+k < N else N)]) for i in range(0, N)]
	lower, upper = zip(*bands) # zip iterates through the given variables

	# Mark anomalies with boolean
	anomaly = (df[col] < lower) \| (df[col] > upper)

	# Get percentage of anomalies to datalength
	percentage = np.round(len(anomaly[anomaly==True])/len(df[col])*100, 3)

	return lower, upper, anomaly, percentage