Skip to content

Instantly share code, notes, and snippets.

@rolandog
Last active April 23, 2018 16:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rolandog/50f1ae4e1900b62922f34b3ae278d451 to your computer and use it in GitHub Desktop.
Save rolandog/50f1ae4e1900b62922f34b3ae278d451 to your computer and use it in GitHub Desktop.
Creates a SHA-256 digest of files in a directory
# -*- coding: utf-8 -*-
"""
Purpose
=======
Creates a SHA-256 digest of files in a directory
Attributions
============
Based on an answer by Richard Neumann on Code Review
https://codereview.stackexchange.com/a/147191
Based on statement that file digests are considered best-practice as of 2012
https://en.wikipedia.org/wiki/File_verification#File_formats
Creation and Modification Times
===============================
Created on Fri Apr 20 12:07:41 2018
Last Modified on Mon Apr 23 09:50:21 2018
License
=======
License: Creative Commons CC0
License URL: https://creativecommons.org/publicdomain/zero/1.0/legalcode
Changelog
=========
* 0.0.2
- Added option to request individual files instead of digest
- Don't output hash of currently running script
- Added changelog
- Added last modified date
- Added example usage by developer and end-user
- Added Creative Commons CC0 License
* 0.0.1
- Initial release.
@author: rolandog
@version: 0.0.2
"""
from os import getcwd, listdir
from os.path import join, isfile, basename
from time import strftime
from hashlib import sha256
from textwrap import dedent
def list_files(basedir=None):
"""List only files within the respective directory"""
if basedir is None:
basedir = getcwd()
# gets the name of the running script
script_name = basename(__file__)
for item in listdir(basedir):
path = join(basedir, item)
# don't make a hash of a hash file or current file
if "sha256-digest" in item or ".sha256" in item or script_name in item:
continue
if isfile(path):
# changed so that we get the path and the filename
yield (path, item)
def sha256sum(file_name, block_size=None):
"""Returns the sha256 checksum of the respective file"""
if block_size is None:
block_size = 4096
checksum = sha256()
with open(file_name, "rb") as file_handle:
block = file_handle.read(block_size)
while block:
checksum.update(block)
block = file_handle.read(block_size)
return checksum.hexdigest()
def sha256sums(basedir=None, block_size=None):
"""Yields (<sha256sum>, <file_name>) tuples
for files within the basedir.
"""
for file_path, file_name in list_files(basedir=basedir):
yield (sha256sum(file_path, block_size=block_size), file_name)
def create_sha256_digest(basedir=None,
block_size=None,
outputdir=None,
individual=False):
"""Creates de sha256-digest file with a timestamp"""
if outputdir is None:
outputdir = getcwd()
if individual is False:
hash_file_name = strftime("sha256-digest_%Y%m%d-%H%M%S")
hash_file_path = join(outputdir, hash_file_name)
with open(hash_file_path, "w") as file_handle:
for file_hash in sha256sums(basedir, block_size):
file_handle.write(" *".join(file_hash) + "\n")
else:
for checksum, file_name in sha256sums(basedir, block_size):
hash_file_name = file_name + ".sha256"
hash_file_path = join(outputdir, hash_file_name)
with open(hash_file_path, "w") as file_handle:
file_handle.write(" *".join((checksum, file_name)) + "\n")
if __name__ == "__main__":
from argparse import ArgumentParser, RawDescriptionHelpFormatter
DESCRIPTION = "Creates a SHA-256 digest of files in a directory"
EPILOG = """\
example usage by developer
--------------------------
ls
hello-world.txt sha256digest.py
cat hello-world.txt
Hello, World!
python sha256digest.py -i
ls
hello-world.txt hello-world.txt.sha256 sha256digest.py
cat hello-world.txt.sha256
c98c24b677eff44860afea6f493bbaec5bb1c4cbb209c6fc2bbb47f66ff2ad31 *hello-world.txt
example usage by end-user
-------------------------
ls
hello-world.txt hello-world.txt.sha256
sha256sum -c hello-world.txt.sha256
hello-world.txt: OK
"""
PARSER = ArgumentParser(prog="sha256digest.py",
description=DESCRIPTION,
formatter_class=RawDescriptionHelpFormatter,
epilog=dedent(EPILOG))
PARSER.add_argument("-i",
"--individual",
default=False,
action="store_true",
required=False,
help="outputs one hash file per file in folder")
PARSER.add_argument("-d",
"--directory",
default=None,
type=str,
required=False,
help="path to the folder containing the files")
PARSER.add_argument("-b",
"--blocksize",
default=None,
type=int,
required=False,
help="read files in chunks less than BLOCKSIZE bytes")
PARSER.add_argument("-o",
"--outputdir",
default=None,
type=str,
required=False,
help="output directory for sha256 digest or files")
ARGUMENTS = PARSER.parse_args()
create_sha256_digest(basedir=ARGUMENTS.directory,
block_size=ARGUMENTS.blocksize,
outputdir=ARGUMENTS.outputdir,
individual=ARGUMENTS.individual)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment