Skip to content

Instantly share code, notes, and snippets.

@ryandeivert
Last active July 18, 2019 22:12
Show Gist options
  • Save ryandeivert/a3f2d738ed8c5f1c03725744aefae709 to your computer and use it in GitHub Desktop.
Save ryandeivert/a3f2d738ed8c5f1c03725744aefae709 to your computer and use it in GitHub Desktop.
very contrived example of caching matcher
from datetime import datetime, timedelta

from stream_alert.shared.rule import rule

import boto3
from botocore.exceptions import ClientError


class CachingMatcher(object):

    # Store a default date
    _LAST_REFRESH = datetime(year=1970, month=1, day=1)
    CACHED_VALUE = None

    @classmethod
    def value(cls, minutes=10):

        now = datetime.utcnow()
        refresh_delta = timedelta(minutes=minutes)

        needs_refresh = (cls._LAST_REFRESH + refresh_delta < now) or cls.CACHED_VALUE is None
        if not needs_refresh:
            # no refresh needed so return cached value
            return cls.CACHED_VALUE

        # Refreshing will now occur

        cls.CACHED_VALUE = cls.update_value()

        cls._LAST_REFRESH = now

        return cls.CACHED_VALUE

    @classmethod
    def update_value(cls):
        raise NotImplementedError('implement the update_value classmethod on subclass')


class AccountIDsMatcher(CachingMatcher):

    @classmethod
    def update_value(cls):

        account_ids = set()

        client = boto3.client('organizations')

        args = {}
        next_token = True

        while next_token:
            if next_token and next_token is not True:
                args['NextToken'] = next_token
            try:
                response = client.list_accounts(**args)
                for account in response['Accounts']:
                    account_ids.add(account['Id'])

                next_token = response.get('NextToken')
            except ClientError as err:
                print err
                return account_ids

        return account_ids

    @classmethod
    def unknown_account(cls, rec):
        # refresh every 60 minutes vs default of every 10
        return rec.get('account') not in cls.value(minutes=60)

###
# Basic rule using matcher directly
###
@rule(logs=['cloudwatch:cloudtrail'],
      matchers=[AccountIDsMatcher.unknown_account])
def is_unknown_account_id(rec):
    return True

###
# Basic rule using cached value in rule
###
@rule(logs=['cloudwatch:cloudtrail'])
def is_unknown_account_id(rec):
    return rec.get('account') not in AccountIDsMatcher.value(minutes=60)
@Ryxias
Copy link

Ryxias commented Jul 18, 2019

I would shy away from the terminology of "Caching" if we're intending to use this as a configuration store.

Could keep with the naming convention and call it like StreamConfig or something

I would DEFINITELY avoid making an HTTP call to the outside world in the middle of a rules execution. Better is to have it pull from a central store and have a system that's separate from StreamAlert's pipeline to run the above logic and periodically update. It reduces the chance of cache inconsistency issues or stampeding.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment