Created
July 29, 2015 20:14
-
-
Save CptSpaceToaster/2761135039c6ec7fd38a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3.4 | |
import re | |
import time | |
def dictionary_has_full_lists(box): | |
for thing in box: | |
if not box[thing]: | |
return False | |
return True | |
def scrub(data_stream, checkpoints, ok_if_missing=False, exit_early=False, size=1024, timeout=0): | |
# Create dictionary with keys from the checkpoint list | |
matches = {} | |
for re_chk in checkpoints: | |
matches[re_chk] = [] | |
exit_time = time.time() + timeout | |
while True: | |
line = data_stream.readline(size) | |
if line: | |
for re_chk in checkpoints: | |
obj = re.search(re_chk, line) | |
if obj: | |
matches[re_chk].append(obj.group(0)) | |
if exit_early and dictionary_has_full_lists(matches): | |
break | |
if timeout > 0 and time.time() > exit_time: | |
raise TimeoutError("Checkpoints could not be found within the given timeout: " + str(timeout)) | |
else: | |
# EOF | |
if ok_if_missing or dictionary_has_full_lists(matches): | |
break | |
raise EOFError("Reached EOF before all checkpoints were reached") | |
return matches | |
if __name__ == "__main__": | |
test_number = 1 | |
print("Test " + str(test_number)) | |
f = open("file1.txt", "r") | |
out = scrub(f, [r'.*text.*', r'.*word.*']) | |
f.close() | |
assert len(out) == 2 | |
assert len(out['.*text.*']) == 1 | |
assert len(out['.*word.*']) == 1 | |
test_number += 1 | |
print("Test " + str(test_number)) | |
f = open("file1.txt", "r") | |
try: | |
scrub(f, [r'.*text.*', r'.*word.*', r'missing regex']) | |
except EOFError as e: | |
# TODO: Assert the exception in a real testing framework | |
print(e) | |
f.close() | |
test_number += 1 | |
print("Test " + str(test_number)) | |
f = open("file1.txt", "r") | |
out = scrub(f, [r'.*text.*', r'.*word.*', r'missing regex'], ok_if_missing=True) | |
f.close() | |
assert len(out) == 3 | |
assert len(out['.*text.*']) == 1 | |
assert len(out['.*word.*']) == 1 | |
assert len(out['missing regex']) == 0 | |
test_number += 1 | |
print("Test " + str(test_number)) | |
f = open("/dev/zero", "r") | |
try: | |
scrub(f, [r'word'], size=64, timeout=1) | |
except TimeoutError as e: | |
# TODO: Assert the exception in a real testing framework | |
print(e) | |
f.close() | |
test_number += 1 | |
print("Test " + str(test_number)) | |
f = open("file1.txt", "r") | |
out = scrub(f, [r'.*this.*']) | |
f.close() | |
assert len(out) == 1 | |
assert len(out['.*this.*']) == 3 | |
test_number += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment