Last active
November 3, 2016 14:27
-
-
Save shapovalovei/1f587be3ca653960a6e3cf265678fc8d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" This program that could take a big log file and make a "single narrative" where text is not repeated """ | |
import sys | |
import re | |
def parse_content(src): | |
"""Get only content(message) from log file""" | |
return re.split(r'\((.*)\) ', src).pop() | |
def parse_timestamp(src): | |
"""Get only timestamp messages from log file""" | |
result = re.search(r'\((.*)\)', src) | |
if result is not None: | |
return result.group(1) | |
def remove_repetition_from(lines): | |
"""Remove repetition data""" | |
messages_dict = {} | |
messages_array = [] | |
for _line in lines: | |
line = _line.rstrip() | |
content = parse_content(line) | |
timestamp = parse_timestamp(line) | |
if content not in messages_dict: | |
messages_dict[content] = timestamp | |
messages_array.append(_line) | |
return messages_array | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
print 'Please input file path' | |
sys.exit(1) | |
fileName = sys.argv[1] | |
print "File path: " + fileName + "\n" | |
with open(fileName) as f: | |
file_content = f.read().split('\n') | |
for message in remove_repetition_from(file_content): | |
print message | |
from unittest import TestCase | |
class TestRemove_repetition_from(TestCase): | |
def test_empty_data(self): | |
self.assertEqual(remove_repetition_from(''), []) | |
def test_remove_repetition(self): | |
list_test = ['fred: I said something', 'bob: Oh I see', '(10:30:28) fred: I said something', | |
'(10:30:29) bob: Oh I see', 'fred: Ah, the text was repeated', 'bob: yes it was', | |
'(Wed 26 10:30:28) fred: I said something', '(Wed 26 10:30:29) bob: Oh I see', | |
'(Wed 26 10:31:20) fred: Ah, the text was repeated', '(Wed 26 10:31:24) bob: yes it was'] | |
actual = ['fred: I said something', 'bob: Oh I see', 'fred: Ah, the text was repeated', 'bob: yes it was'] | |
self.assertEqual(remove_repetition_from(list_test), actual) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment