Last active
April 23, 2018 16:40
-
-
Save rolandog/50f1ae4e1900b62922f34b3ae278d451 to your computer and use it in GitHub Desktop.
Creates a SHA-256 digest of files in a directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Purpose | |
======= | |
Creates a SHA-256 digest of files in a directory | |
Attributions | |
============ | |
Based on an answer by Richard Neumann on Code Review | |
https://codereview.stackexchange.com/a/147191 | |
Based on statement that file digests are considered best-practice as of 2012 | |
https://en.wikipedia.org/wiki/File_verification#File_formats | |
Creation and Modification Times | |
=============================== | |
Created on Fri Apr 20 12:07:41 2018 | |
Last Modified on Mon Apr 23 09:50:21 2018 | |
License | |
======= | |
License: Creative Commons CC0 | |
License URL: https://creativecommons.org/publicdomain/zero/1.0/legalcode | |
Changelog | |
========= | |
* 0.0.2 | |
- Added option to request individual files instead of digest | |
- Don't output hash of currently running script | |
- Added changelog | |
- Added last modified date | |
- Added example usage by developer and end-user | |
- Added Creative Commons CC0 License | |
* 0.0.1 | |
- Initial release. | |
@author: rolandog | |
@version: 0.0.2 | |
""" | |
from os import getcwd, listdir | |
from os.path import join, isfile, basename | |
from time import strftime | |
from hashlib import sha256 | |
from textwrap import dedent | |
def list_files(basedir=None): | |
"""List only files within the respective directory""" | |
if basedir is None: | |
basedir = getcwd() | |
# gets the name of the running script | |
script_name = basename(__file__) | |
for item in listdir(basedir): | |
path = join(basedir, item) | |
# don't make a hash of a hash file or current file | |
if "sha256-digest" in item or ".sha256" in item or script_name in item: | |
continue | |
if isfile(path): | |
# changed so that we get the path and the filename | |
yield (path, item) | |
def sha256sum(file_name, block_size=None): | |
"""Returns the sha256 checksum of the respective file""" | |
if block_size is None: | |
block_size = 4096 | |
checksum = sha256() | |
with open(file_name, "rb") as file_handle: | |
block = file_handle.read(block_size) | |
while block: | |
checksum.update(block) | |
block = file_handle.read(block_size) | |
return checksum.hexdigest() | |
def sha256sums(basedir=None, block_size=None): | |
"""Yields (<sha256sum>, <file_name>) tuples | |
for files within the basedir. | |
""" | |
for file_path, file_name in list_files(basedir=basedir): | |
yield (sha256sum(file_path, block_size=block_size), file_name) | |
def create_sha256_digest(basedir=None, | |
block_size=None, | |
outputdir=None, | |
individual=False): | |
"""Creates de sha256-digest file with a timestamp""" | |
if outputdir is None: | |
outputdir = getcwd() | |
if individual is False: | |
hash_file_name = strftime("sha256-digest_%Y%m%d-%H%M%S") | |
hash_file_path = join(outputdir, hash_file_name) | |
with open(hash_file_path, "w") as file_handle: | |
for file_hash in sha256sums(basedir, block_size): | |
file_handle.write(" *".join(file_hash) + "\n") | |
else: | |
for checksum, file_name in sha256sums(basedir, block_size): | |
hash_file_name = file_name + ".sha256" | |
hash_file_path = join(outputdir, hash_file_name) | |
with open(hash_file_path, "w") as file_handle: | |
file_handle.write(" *".join((checksum, file_name)) + "\n") | |
if __name__ == "__main__": | |
from argparse import ArgumentParser, RawDescriptionHelpFormatter | |
DESCRIPTION = "Creates a SHA-256 digest of files in a directory" | |
EPILOG = """\ | |
example usage by developer | |
-------------------------- | |
ls | |
hello-world.txt sha256digest.py | |
cat hello-world.txt | |
Hello, World! | |
python sha256digest.py -i | |
ls | |
hello-world.txt hello-world.txt.sha256 sha256digest.py | |
cat hello-world.txt.sha256 | |
c98c24b677eff44860afea6f493bbaec5bb1c4cbb209c6fc2bbb47f66ff2ad31 *hello-world.txt | |
example usage by end-user | |
------------------------- | |
ls | |
hello-world.txt hello-world.txt.sha256 | |
sha256sum -c hello-world.txt.sha256 | |
hello-world.txt: OK | |
""" | |
PARSER = ArgumentParser(prog="sha256digest.py", | |
description=DESCRIPTION, | |
formatter_class=RawDescriptionHelpFormatter, | |
epilog=dedent(EPILOG)) | |
PARSER.add_argument("-i", | |
"--individual", | |
default=False, | |
action="store_true", | |
required=False, | |
help="outputs one hash file per file in folder") | |
PARSER.add_argument("-d", | |
"--directory", | |
default=None, | |
type=str, | |
required=False, | |
help="path to the folder containing the files") | |
PARSER.add_argument("-b", | |
"--blocksize", | |
default=None, | |
type=int, | |
required=False, | |
help="read files in chunks less than BLOCKSIZE bytes") | |
PARSER.add_argument("-o", | |
"--outputdir", | |
default=None, | |
type=str, | |
required=False, | |
help="output directory for sha256 digest or files") | |
ARGUMENTS = PARSER.parse_args() | |
create_sha256_digest(basedir=ARGUMENTS.directory, | |
block_size=ARGUMENTS.blocksize, | |
outputdir=ARGUMENTS.outputdir, | |
individual=ARGUMENTS.individual) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment