Skip to content

Instantly share code, notes, and snippets.

@dataday
Last active July 10, 2017 11:17
Show Gist options
  • Save dataday/41e59df8ced67d86387e43a132999b3d to your computer and use it in GitHub Desktop.
Save dataday/41e59df8ced67d86387e43a132999b3d to your computer and use it in GitHub Desktop.
Calculate file sizes for selected system file types
import re
from collections import OrderedDict
# re.MULTILINE
line_pattern = re.compile('^([\w\W]+)\.([\w\d]+)\s(\d+)b$')
selected_file_types = {
'music': ['mp3', 'aac', 'flac'],
'images' : ['jpg', 'bmp', 'gif'],
'movies' : ['mp4', 'avi', 'mkv'],
'other' : ['7z', 'txt', 'zip', 'exe']
}
def get_line_match(line):
"""Matching groups from `self.line_pattern`
Group 0 - File line
Group 1 - File name
Group 2 - File suffix
Group 3 - File size in bytes (b)
"""
return line_pattern.match(line) if line_pattern.match(line) else False
def solution(S):
lines = S.split('\n')
results = ''
result = "{0} {1}b\n"
# Hard coded result keys, due to time
calculations = {
'music': int(0),
'images': int(0),
'movies': int(0),
'other': int(0)
}
# Preseve output sequence, ordered by key
ordered_calculations = OrderedDict(sorted(calculations.items(), key=lambda t: t[0]))
# Extract input data
for line in lines:
file_match = get_line_match(line)
if file_match:
# Added for readability, and Python 3 forward compatibility
# slower processing in python 2.7 vs a line for line generator
for key, value in selected_file_types.items():
# Does the suffix exist and is it sort after
if file_match.group(2) in value:
if file_match.group(3) > 0:
ordered_calculations[key] += int(file_match.group(3))
# Format data results
for key, value in ordered_calculations.items():
results += result.format(key, value)
return results
if __name__ == '__main__':
results = solution(
'my.song.mp3 11b\ngreatSong.flac 1000b\nnot3.txt 5b\nvideo.mp4 200b\ngame.exe 100b\nmov!e.mkv 10000b'
)
print results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment