liquidgenius/low_pass_filter.py

## low_pass_filter.py
def low_pass_filter_anomaly_detection(df: pd.DataFrame,
                                      column_name: str,
                                      rolling_window: int,
                                      number_of_stdevs_away_from_mean: float):
    """
    Implement a low-pass filter to detect anomalies in a time series, and save the filter outputs
    (True/False) to a new column in the dataframe.
    Arguments:
        df: Pandas dataframe
        column_name: string. Name of the column that we want to detect anomalies in
        rolling_window: int: The number of observations composing the rolling window.
        number_of_stdevs_away_from_mean: float. Number of standard deviations away from
        the mean that we want to flag anomalies at. For example, if
        number_of_stdevs_away_from_mean=2,
        then all data points more than 2 standard deviations away from the mean are flagged as
        anomalies.
    Outputs:
        df: Pandas dataframe. Dataframe containing column for low pass filter anomalies
        (True/False)

    :param df: pd.DataFrame:
    :param column_name: str: The name of the column to perform anomaly detection on.
    :param rolling_window: int: The number of observations composing the rolling window.
    :param number_of_stdevs_away_from_mean: float: Distance from mean where anomaly detection should begin.
    :return: pd.DataFrame: The resulting filtered dataframe.
    """

    # N-day rolling average
    df.loc[f'{column_name}_Rolling_Average'] = df.loc[column_name]\
        .rolling(window=rolling_window, center=True).mean()

    # N-day standard deviation
    df.loc[f'{column_name}_Rolling_StDev'] = df.loc[column_name].rolling(window=rolling_window, center=True).std()

    # Detect anomalies by determining how far away from the mean (in terms of standard deviation)
    # each data point is
    df.loc[f'{column_name}_Low_Pass_Filter_Anomaly'] = \
        (abs(df.loc[column_name]-df.loc[column_name+'_Rolling_Average']) >
         (number_of_stdevs_away_from_mean * df.loc[column_name+'_Rolling_StDev']))

    return df
	def low_pass_filter_anomaly_detection(df: pd.DataFrame,
	column_name: str,
	rolling_window: int,
	number_of_stdevs_away_from_mean: float):
	"""
	Implement a low-pass filter to detect anomalies in a time series, and save the filter outputs
	(True/False) to a new column in the dataframe.
	Arguments:
	df: Pandas dataframe
	column_name: string. Name of the column that we want to detect anomalies in
	rolling_window: int: The number of observations composing the rolling window.
	number_of_stdevs_away_from_mean: float. Number of standard deviations away from
	the mean that we want to flag anomalies at. For example, if
	number_of_stdevs_away_from_mean=2,
	then all data points more than 2 standard deviations away from the mean are flagged as
	anomalies.
	Outputs:
	df: Pandas dataframe. Dataframe containing column for low pass filter anomalies
	(True/False)

	:param df: pd.DataFrame:
	:param column_name: str: The name of the column to perform anomaly detection on.
	:param rolling_window: int: The number of observations composing the rolling window.
	:param number_of_stdevs_away_from_mean: float: Distance from mean where anomaly detection should begin.
	:return: pd.DataFrame: The resulting filtered dataframe.
	"""

	# N-day rolling average
	df.loc[f'{column_name}_Rolling_Average'] = df.loc[column_name]\
	.rolling(window=rolling_window, center=True).mean()

	# N-day standard deviation
	df.loc[f'{column_name}_Rolling_StDev'] = df.loc[column_name].rolling(window=rolling_window, center=True).std()

	# Detect anomalies by determining how far away from the mean (in terms of standard deviation)
	# each data point is
	df.loc[f'{column_name}_Low_Pass_Filter_Anomaly'] = \
	(abs(df.loc[column_name]-df.loc[column_name+'_Rolling_Average']) >
	(number_of_stdevs_away_from_mean * df.loc[column_name+'_Rolling_StDev']))

	return df