Last active
January 10, 2018 21:39
-
-
Save RyanMagnusson/10791a8da195353ed9c18c612b125878 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Union, Optional, List, Dict, Set | |
from enum import Enum | |
from os import walk | |
from os.path import realpath, join, isfile, isdir, basename, getsize | |
from hashlib import md5,sha256 | |
from argparse import ArgumentParser | |
from json import loads as unpickle, dumps as pickle | |
import sys | |
def is_blank(text:Union[str,chr] = None, cast:bool = True) -> bool: | |
if text is None: | |
return True | |
if not isinstance(text, (str,chr)) and not cast: | |
return False | |
_text = str(text) | |
for c in _text: | |
if c != ' ' and c != '\t' and c != '\n' and c != '\r' and c != chr(160): | |
return False | |
return True | |
def is_not_blank(text:Union[str,chr] = None) -> bool: | |
return not is_blank(text) | |
def trim_or_None_if_empty(text = None, strict:bool = False) -> Optional[str]: | |
if text is None: | |
return None | |
if strict and not isinstance(text, (str,chr)): | |
as_text = None | |
try: | |
as_text = str(text) | |
if as_text: | |
as_text = ": " + as_text | |
except: | |
pass | |
raise ValueError('A string or unicode character must be provided. A {} was given instead{}'.format(type(text), as_text)) | |
trimmed = str(text).strip() | |
if len(trimmed) < 1: | |
return None | |
return trimmed | |
class Algorithm(Enum): | |
SHA256 = 'sha-256' | |
MD5 = 'md5' | |
@classmethod | |
def values(cls) -> List['Algorithm']: | |
return (Algorithm.SHA256, Algorithm.MD5) | |
@classmethod | |
def from_string(cls, text: str): | |
trimmed = trim_or_None_if_empty(text) | |
if trimmed is None: | |
return None | |
trimmed = trimmed.upper() | |
for t in cls.values(): | |
if (t.name == trimmed | |
or t.value.upper() == trimmed | |
or t.value.replace('-', '_').upper() == trimmed): | |
return t | |
return None | |
class File(object): | |
def __init__(self, path:str, checksums:Dict[Algorithm,str] = {}): | |
self.path = path | |
self.checksums = checksums or {} | |
self.size = 0 | |
@property | |
def path(self): | |
return self.__path | |
@path.setter | |
def path(self, value:str = None) -> 'File': | |
self.__path = trim_or_None_if_empty(value) | |
return self | |
@property | |
def name(self): | |
if self.path: | |
return basename(self.path) | |
@property | |
def size(self): | |
return self.__size | |
@size.setter | |
def size(self, bytes:int = None): | |
self.__size = 0 if bytes is None or bytes < 0 else bytes | |
def __getstate__(self): | |
data = {'py/object': self.__class__.__name__ } | |
data['path'] = self.path | |
return data | |
def __str__(self): | |
try: | |
return pickle(self) | |
except: | |
return self.path | |
class Column(Enum): | |
PATH = 'Path' | |
DIRECTORY = 'Directory' | |
NAME = 'Name' | |
SHA256 = 'SHA-256' | |
MD5 = 'MD5' | |
SIZE = "Size" | |
@classmethod | |
def values(cls) -> List['Column']: | |
return (Column.NAME, Column.SHA256, Column.MD5, Column.PATH, Column.DIRECTORY, Column.SIZE) | |
@classmethod | |
def from_string(cls, text: str): | |
trimmed = trim_or_None_if_empty(text) | |
if trimmed is None: | |
return None | |
trimmed = trimmed.upper() | |
for t in cls.values(): | |
if (t.name == trimmed | |
or t.value.upper() == trimmed | |
or t.value.replace('-', '_').upper() == trimmed): | |
return t | |
return None | |
def calculate_checksum(file:str, algorithm:Algorithm): | |
hash = None | |
if algorithm is Algorithm.MD5: | |
hash = md5() | |
elif algorithm is Algorithm.SHA256: | |
hash = sha256() | |
else: | |
raise ValueError('Unsuported algorithm: ' + str(algorithm)) | |
with open(file, "rb") as f: | |
for chunk in iter(lambda: f.read(4096), b""): | |
hash.update(chunk) | |
return hash.hexdigest() | |
arg_parser = ArgumentParser(description='Calculate md5 or sha256 of file(s)') | |
arg_parser.add_argument('file') | |
arg_parser.add_argument('--sha256', action='store_true', default=False) | |
arg_parser.add_argument('--md5', action='store_true', default=True) | |
args = arg_parser.parse_args() | |
def format_files(files:List[File] = [], columns:List[Column] = None, delimiter:chr = '|') -> str: | |
_columns = columns | |
if columns is None or len(columns) < 1: | |
colSet = set([]) | |
for f in files: | |
if is_not_blank(f.path): | |
colSet.add(Column.PATH) | |
elif is_not_blank(f.name): | |
colSet.add(Column.NAME) | |
if f.checksums is not None and len(f.checksums) > 0: | |
if f.checksums.get(Algorithm.SHA256,None) is not None: | |
colSet.add(Column.SHA256) | |
if f.checksums.get(Algorithm.MD5,None) is not None: | |
colSet.add(Column.MD5) | |
_columns = list(colSet) | |
for f in files: | |
print(format_file(f, _columns, delimiter), file=sys.stdout) | |
def format_file(file:File = None, columns:List[Column] = [Column.PATH, Column.SHA256, Column.MD5, Column.SIZE], delimiter:chr = '|') -> str: | |
if file is None: | |
return '' | |
if columns is None or len(columns) < 1: | |
return '' | |
result = '' | |
for col in columns: | |
if result: | |
result = result + delimiter | |
if col is Column.PATH: | |
result = result + ('' if file.path is None else file.path) | |
elif col is Column.DIRECTORY: | |
result = result + ('' if file.directory is None else file.directory) | |
elif col is Column.NAME: | |
result = result + ('' if file.name is None else file.name) | |
elif col is Column.SHA256: | |
sha256_checksum = None if file.checksums is None else file.checksums.get(Algorithm.SHA256, None) | |
result = result + ('' if sha256_checksum is None else sha256_checksum) | |
elif col is Column.MD5: | |
md5_checksum = None if file.checksums is None else file.checksums.get(Algorithm.MD5, None) | |
result = result + ('' if md5_checksum is None else md5_checksum) | |
elif col is Column.SIZE: | |
result = result + '0' if file.size is None else str(file.size) | |
else: | |
raise ValueError('Unsupported column for formatting') | |
return result | |
file_paths = [] # List which will store all of the full filepaths. | |
if isfile(args.file): | |
f = File(realpath(args.file)) | |
if args.md5: | |
f.checksums[Algorithm.MD5] = calculate_checksum(f.path, Algorithm.MD5) | |
if args.sha256: | |
f.checksums[Algorithm.SHA256] = calculate_checksum(f.path, Algorithm.SHA256) | |
file_paths.append(f) | |
else: | |
# Walk the tree. | |
for root, directories, files in walk(args.file): | |
for filename in files: | |
# Join the two strings in order to form the full filepath. | |
filepath = join(root, filename) | |
f = File(realpath(filepath)) | |
if args.md5: | |
f.checksums[Algorithm.MD5] = calculate_checksum(f.path, Algorithm.MD5) | |
if args.sha256: | |
f.checksums[Algorithm.SHA256] = calculate_checksum(f.path, Algorithm.SHA256) | |
f.size = getsize(filepath) | |
file_paths.append(f) | |
format_files(file_paths) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment