Skip to content

Instantly share code, notes, and snippets.

@TomFaulkner
Last active July 15, 2017 23:12
Show Gist options
  • Save TomFaulkner/d36002269dbe6f8f9b9eaff429c76de1 to your computer and use it in GitHub Desktop.
Save TomFaulkner/d36002269dbe6f8f9b9eaff429c76de1 to your computer and use it in GitHub Desktop.
Include and exclude filter for exact matches or regex matches
"""
Function to filter a string using an optional exact match whitelist (include),
optional whitelist using regex (include_regex),
exact match blacklist (exclude), and blacklist regex (exclude_regex)
See unittest below for usage examples
Code is from https://github.com/TomFaulkner/pypihole/blob/master/pypihole/helpers/filtering.py,
check there for updates
"""
import re
def ie_filter(entry: str, **kwargs) -> bool:
"""
Include and exclude work as a whitelist and a blacklist.
If include is not None, but is a list, then only whitelisted entries
will be returned.
If exclude is not None, but is a list, then anything blacklisted won't be
returned.
Both include and exclude can be provided at the same time, however, it
the exclude list is redundant at that point, unless it overlaps with some
of the whitelist, in which case whitelisted entries included in the
blacklist will be excluded.
:param entry: any string, intended for Query fields
:return: boolean, whether to include or not
kwargs:
:param include: list of items to match and include, must be exact match
:param exclude: list of items to exclude, must be exact match
:param include_regex: list of regex strings to match and include
:param exclude_regex: list of regex strings to match and exclude
"""
def run_regex(target, patterns):
for pattern in patterns:
if re.search(pattern, target):
return True
else:
return False
def include_test():
if not include and not include_regex:
return True
elif include or include_regex:
if entry in include:
return True
elif run_regex(entry, include_regex):
return True
return False
def exclude_test():
if not exclude and not exclude_regex:
return True
elif exclude or exclude_regex:
if entry in exclude:
return False
elif run_regex(entry, exclude_regex):
return False
return True
include = kwargs.pop('include', [])
exclude = kwargs.pop('exclude', [])
include_regex = kwargs.pop('include_regex', [])
exclude_regex = kwargs.pop('exclude_regex', [])
if not include and not exclude and not include_regex and not exclude_regex:
return True
pass_include = include_test()
pass_exclude = exclude_test()
if include or include_regex:
if pass_include and pass_exclude:
return True
elif exclude and pass_exclude:
return True
return False
##############
# Unit Tests #
##############
import unittest
from filtering import ie_filter
class Test_ie_filter(unittest.TestCase):
def setUp(self):
self.filter_test_string = 'Pihole blocks the ads, pypihole helps ' \
'to analyze Pihole'
def test_ie_filter_no_filter(self):
res = ie_filter(self.filter_test_string)
self.assertTrue(res)
def test_ie_filter_include(self):
# one include, match
res = ie_filter(self.filter_test_string,
include=[self.filter_test_string])
self.assertTrue(res)
# one include, not a match
res = ie_filter(self.filter_test_string,
include=['not going to happen'])
self.assertFalse(res)
# two includes, first will hit
res = ie_filter(self.filter_test_string,
include=[self.filter_test_string,
'pick me, pick me'])
self.assertTrue(res)
def test_ie_filter_exclude(self):
# exclude match
res = ie_filter(self.filter_test_string,
exclude=[self.filter_test_string])
self.assertFalse(res)
# exclude doesn't match
res = ie_filter(self.filter_test_string,
exclude=['something else'])
self.assertTrue(res)
# two excludes, second matches
res = ie_filter(self.filter_test_string,
exclude=['something else',
self.filter_test_string])
self.assertFalse(res)
def test_ie_filter_include_and_exclude(self):
res = ie_filter(self.filter_test_string,
exclude=[self.filter_test_string])
self.assertFalse(res)
def test_ie_filter_include_regex(self):
res = ie_filter(
self.filter_test_string,
include_regex=[r'hole'])
self.assertTrue(res)
# two matches
res = ie_filter(
self.filter_test_string,
include_regex=[r'hole', 'blocks'])
self.assertTrue(res)
# what happens if first string isn't a match?
res = ie_filter(
self.filter_test_string,
include_regex=['nah bro', 'hole', 'blocks'])
self.assertTrue(res)
def test_ie_filter_exclude_regex(self):
res = ie_filter(
self.filter_test_string,
exclude_regex=[r'hole'])
self.assertFalse(res)
# two matches
res = ie_filter(
self.filter_test_string,
exclude_regex=[r'hole', 'blocks'])
self.assertFalse(res)
# what happens if first string isn't a match?
res = ie_filter(
self.filter_test_string,
exclude_regex=['nah bro', 'hole', 'blocks'])
self.assertFalse(res)
def test_ie_filter_include_regex_and_exclude_regex(self):
res = ie_filter(
self.filter_test_string,
include_regex=['hole'],
exclude_regex=['pi'])
self.assertFalse(res)
# two matches
res = ie_filter(
self.filter_test_string,
include_regex=['hole'],
exclude_regex=['pi', 'blocks'])
self.assertFalse(res)
# what happens if first string isn't a match?
res = ie_filter(
self.filter_test_string,
include_regex=['pi'],
exclude_regex=['nah bro', 'hole', 'blocks'])
self.assertFalse(res)
def test_ie_filter_include_and_include_regex(self):
res = ie_filter(
self.filter_test_string,
include=self.filter_test_string,
include_regex=['hole'])
self.assertTrue(res)
def test_ie_filter_include_and_exclude_regex(self):
res = ie_filter(
self.filter_test_string,
include=self.filter_test_string,
exclude_regex=['hole'])
self.assertFalse(res)
res = ie_filter(
self.filter_test_string,
include=self.filter_test_string,
exclude_regex=['nah bro'])
self.assertTrue(res)
def test_ie_filter_exclude_and_include_regex(self):
res = ie_filter(
self.filter_test_string,
exclude=self.filter_test_string,
include_regex=['hole'])
self.assertFalse(res)
res = ie_filter(
self.filter_test_string,
exclude='google.com',
include_regex=['hole'])
self.assertTrue(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment