Skip to content

Instantly share code, notes, and snippets.

@RyanMagnusson
Last active January 10, 2018 21:39
Show Gist options
  • Save RyanMagnusson/10791a8da195353ed9c18c612b125878 to your computer and use it in GitHub Desktop.
Save RyanMagnusson/10791a8da195353ed9c18c612b125878 to your computer and use it in GitHub Desktop.
from typing import Union, Optional, List, Dict, Set
from enum import Enum
from os import walk
from os.path import realpath, join, isfile, isdir, basename, getsize
from hashlib import md5,sha256
from argparse import ArgumentParser
from json import loads as unpickle, dumps as pickle
import sys
def is_blank(text:Union[str,chr] = None, cast:bool = True) -> bool:
if text is None:
return True
if not isinstance(text, (str,chr)) and not cast:
return False
_text = str(text)
for c in _text:
if c != ' ' and c != '\t' and c != '\n' and c != '\r' and c != chr(160):
return False
return True
def is_not_blank(text:Union[str,chr] = None) -> bool:
return not is_blank(text)
def trim_or_None_if_empty(text = None, strict:bool = False) -> Optional[str]:
if text is None:
return None
if strict and not isinstance(text, (str,chr)):
as_text = None
try:
as_text = str(text)
if as_text:
as_text = ": " + as_text
except:
pass
raise ValueError('A string or unicode character must be provided. A {} was given instead{}'.format(type(text), as_text))
trimmed = str(text).strip()
if len(trimmed) < 1:
return None
return trimmed
class Algorithm(Enum):
SHA256 = 'sha-256'
MD5 = 'md5'
@classmethod
def values(cls) -> List['Algorithm']:
return (Algorithm.SHA256, Algorithm.MD5)
@classmethod
def from_string(cls, text: str):
trimmed = trim_or_None_if_empty(text)
if trimmed is None:
return None
trimmed = trimmed.upper()
for t in cls.values():
if (t.name == trimmed
or t.value.upper() == trimmed
or t.value.replace('-', '_').upper() == trimmed):
return t
return None
class File(object):
def __init__(self, path:str, checksums:Dict[Algorithm,str] = {}):
self.path = path
self.checksums = checksums or {}
self.size = 0
@property
def path(self):
return self.__path
@path.setter
def path(self, value:str = None) -> 'File':
self.__path = trim_or_None_if_empty(value)
return self
@property
def name(self):
if self.path:
return basename(self.path)
@property
def size(self):
return self.__size
@size.setter
def size(self, bytes:int = None):
self.__size = 0 if bytes is None or bytes < 0 else bytes
def __getstate__(self):
data = {'py/object': self.__class__.__name__ }
data['path'] = self.path
return data
def __str__(self):
try:
return pickle(self)
except:
return self.path
class Column(Enum):
PATH = 'Path'
DIRECTORY = 'Directory'
NAME = 'Name'
SHA256 = 'SHA-256'
MD5 = 'MD5'
SIZE = "Size"
@classmethod
def values(cls) -> List['Column']:
return (Column.NAME, Column.SHA256, Column.MD5, Column.PATH, Column.DIRECTORY, Column.SIZE)
@classmethod
def from_string(cls, text: str):
trimmed = trim_or_None_if_empty(text)
if trimmed is None:
return None
trimmed = trimmed.upper()
for t in cls.values():
if (t.name == trimmed
or t.value.upper() == trimmed
or t.value.replace('-', '_').upper() == trimmed):
return t
return None
def calculate_checksum(file:str, algorithm:Algorithm):
hash = None
if algorithm is Algorithm.MD5:
hash = md5()
elif algorithm is Algorithm.SHA256:
hash = sha256()
else:
raise ValueError('Unsuported algorithm: ' + str(algorithm))
with open(file, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash.update(chunk)
return hash.hexdigest()
arg_parser = ArgumentParser(description='Calculate md5 or sha256 of file(s)')
arg_parser.add_argument('file')
arg_parser.add_argument('--sha256', action='store_true', default=False)
arg_parser.add_argument('--md5', action='store_true', default=True)
args = arg_parser.parse_args()
def format_files(files:List[File] = [], columns:List[Column] = None, delimiter:chr = '|') -> str:
_columns = columns
if columns is None or len(columns) < 1:
colSet = set([])
for f in files:
if is_not_blank(f.path):
colSet.add(Column.PATH)
elif is_not_blank(f.name):
colSet.add(Column.NAME)
if f.checksums is not None and len(f.checksums) > 0:
if f.checksums.get(Algorithm.SHA256,None) is not None:
colSet.add(Column.SHA256)
if f.checksums.get(Algorithm.MD5,None) is not None:
colSet.add(Column.MD5)
_columns = list(colSet)
for f in files:
print(format_file(f, _columns, delimiter), file=sys.stdout)
def format_file(file:File = None, columns:List[Column] = [Column.PATH, Column.SHA256, Column.MD5, Column.SIZE], delimiter:chr = '|') -> str:
if file is None:
return ''
if columns is None or len(columns) < 1:
return ''
result = ''
for col in columns:
if result:
result = result + delimiter
if col is Column.PATH:
result = result + ('' if file.path is None else file.path)
elif col is Column.DIRECTORY:
result = result + ('' if file.directory is None else file.directory)
elif col is Column.NAME:
result = result + ('' if file.name is None else file.name)
elif col is Column.SHA256:
sha256_checksum = None if file.checksums is None else file.checksums.get(Algorithm.SHA256, None)
result = result + ('' if sha256_checksum is None else sha256_checksum)
elif col is Column.MD5:
md5_checksum = None if file.checksums is None else file.checksums.get(Algorithm.MD5, None)
result = result + ('' if md5_checksum is None else md5_checksum)
elif col is Column.SIZE:
result = result + '0' if file.size is None else str(file.size)
else:
raise ValueError('Unsupported column for formatting')
return result
file_paths = [] # List which will store all of the full filepaths.
if isfile(args.file):
f = File(realpath(args.file))
if args.md5:
f.checksums[Algorithm.MD5] = calculate_checksum(f.path, Algorithm.MD5)
if args.sha256:
f.checksums[Algorithm.SHA256] = calculate_checksum(f.path, Algorithm.SHA256)
file_paths.append(f)
else:
# Walk the tree.
for root, directories, files in walk(args.file):
for filename in files:
# Join the two strings in order to form the full filepath.
filepath = join(root, filename)
f = File(realpath(filepath))
if args.md5:
f.checksums[Algorithm.MD5] = calculate_checksum(f.path, Algorithm.MD5)
if args.sha256:
f.checksums[Algorithm.SHA256] = calculate_checksum(f.path, Algorithm.SHA256)
f.size = getsize(filepath)
file_paths.append(f)
format_files(file_paths)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment