Skip to content

Instantly share code, notes, and snippets.

@shapovalovei
Last active November 3, 2016 14:27
Show Gist options
  • Save shapovalovei/1f587be3ca653960a6e3cf265678fc8d to your computer and use it in GitHub Desktop.
Save shapovalovei/1f587be3ca653960a6e3cf265678fc8d to your computer and use it in GitHub Desktop.
""" This program that could take a big log file and make a "single narrative" where text is not repeated """
import sys
import re
def parse_content(src):
"""Get only content(message) from log file"""
return re.split(r'\((.*)\) ', src).pop()
def parse_timestamp(src):
"""Get only timestamp messages from log file"""
result = re.search(r'\((.*)\)', src)
if result is not None:
return result.group(1)
def remove_repetition_from(lines):
"""Remove repetition data"""
messages_dict = {}
messages_array = []
for _line in lines:
line = _line.rstrip()
content = parse_content(line)
timestamp = parse_timestamp(line)
if content not in messages_dict:
messages_dict[content] = timestamp
messages_array.append(_line)
return messages_array
if __name__ == "__main__":
if len(sys.argv) < 2:
print 'Please input file path'
sys.exit(1)
fileName = sys.argv[1]
print "File path: " + fileName + "\n"
with open(fileName) as f:
file_content = f.read().split('\n')
for message in remove_repetition_from(file_content):
print message
from unittest import TestCase
class TestRemove_repetition_from(TestCase):
def test_empty_data(self):
self.assertEqual(remove_repetition_from(''), [])
def test_remove_repetition(self):
list_test = ['fred: I said something', 'bob: Oh I see', '(10:30:28) fred: I said something',
'(10:30:29) bob: Oh I see', 'fred: Ah, the text was repeated', 'bob: yes it was',
'(Wed 26 10:30:28) fred: I said something', '(Wed 26 10:30:29) bob: Oh I see',
'(Wed 26 10:31:20) fred: Ah, the text was repeated', '(Wed 26 10:31:24) bob: yes it was']
actual = ['fred: I said something', 'bob: Oh I see', 'fred: Ah, the text was repeated', 'bob: yes it was']
self.assertEqual(remove_repetition_from(list_test), actual)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment