Skip to content

Instantly share code, notes, and snippets.

@kelleyk
Created June 23, 2014 01:28
Show Gist options
  • Save kelleyk/6f5bffe11fb70d694488 to your computer and use it in GitHub Desktop.
Save kelleyk/6f5bffe11fb70d694488 to your computer and use it in GitHub Desktop.
from __future__ import absolute_import, print_function, division, unicode_literals
import re
from collections import defaultdict, Counter
INITIAL_SYMBOLS = b'abc'
def count_fragments(lines, initial_symbols=None):
initial_symbols = initial_symbols or INITIAL_SYMBOLS
# # @KK: Built-in 're' module has wonky support for repeated groups, so this pattern (which
# # would match the whole thing in one go and makes more sense) doesn't really work.
# pattern = re.compile(r'^([' + re.escape(initial_symbols) + '][^' + re.escape(initial_symbols) + ']*)*$')
pattern = re.compile(r'^([' + re.escape(initial_symbols) + '][^' + re.escape(initial_symbols) + ']*)(.*)$')
fragments = defaultdict(Counter)
for i, line in enumerate(lines):
while line:
m = pattern.match(line)
if not m:
raise AssertionError('Line {} does not match regex!'.format(i+1))
frag, line = m.groups()
fragments[frag[0]].update((frag,))
return fragments
def format_counts(fragments):
for initial, frag_counts in fragments.items():
print('{}'.format(initial))
for qty, frag in sorted(((qty, frag) for frag, qty in frag_counts.items()), reverse=True):
print(' {:>5} {}'.format(qty, frag))
def main():
import sys
with open(sys.argv[1], 'rb') as f:
format_counts(count_fragments(line for line in f))
if __name__ == '__main__':
main()
from __future__ import absolute_import, print_function, division, unicode_literals
import pytest
from fragcount import count_fragments
@pytest.mark.parametrize(('lines', 'expected'), [
([b'axyzbyazyzz'],
dict(a=dict(axyz=1, azyzz=1), b=dict(by=1))),
])
def test_count_fragments(lines, expected):
result = count_fragments(lines, initial_symbols=b'abc')
result_pod = {k: {x: y for x, y in v.items()} for k, v in result.items()}
assert result_pod == expected
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment