Created
February 1, 2020 13:59
-
-
Save dvarrazzo/527287ca59c13b68ea6d484c04c1a310 to your computer and use it in GitHub Desktop.
Stable hash of Dockerfile content
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Emit an hash on stdout of a dockerfile and the files added by it. | |
- Dockerfile comments don't affect the hash value. | |
- If a directory is added by dockerfile, only the git-versioned files are | |
included in the hash. | |
- A single file hash matches md5sum result. | |
""" | |
import os | |
import sys | |
import hashlib | |
import subprocess as sp | |
from glob import glob | |
from dockerfile_parse import DockerfileParser | |
import logging | |
logger = logging.getLogger() | |
logging.basicConfig( | |
level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s' | |
) | |
def main(): | |
opt = parse_cmdline() | |
logger.setLevel(getattr(logging, opt.loglevel)) | |
for dfn in opt.files: | |
with open(dfn) as f: | |
dfp = DockerfileParser(fileobj=f) | |
dfcont = dfp.structure | |
logger.debug("hashing dockerfile %s", dfn) | |
print("%s %s" % (hash_dockerfile(dfcont), dfn)) | |
ctx = opt.context or os.path.dirname(dfn) | |
for stmt in dfcont: | |
if stmt['instruction'] not in ('ADD', 'COPY'): | |
continue | |
if stmt['instruction'] == 'COPY' and '--from' in stmt['value']: | |
continue | |
logger.debug("%s: %s", stmt['instruction'], stmt['value']) | |
values = stmt['value'].split() | |
if len(values) < 2: | |
raise ScriptError("can't see what to add here: %s" % stmt()) | |
fns = [] | |
for v in values[:-1]: | |
v = os.path.join(ctx, v) | |
if os.path.exists(v): | |
fns.append(v) | |
else: | |
v = glob(v) | |
if v: | |
v.sort() | |
fns.extend(v) | |
else: | |
raise ScriptError( | |
"Can't find this file: %s %s" | |
% (stmt['instruction'], stmt['value']) | |
) | |
for fn in fns: | |
if os.path.isfile(fn): | |
logger.debug("hashing file %s", fn) | |
print("%s %s" % (md5sum(fn), fn)) | |
elif os.path.isdir(fn): | |
logger.debug("hashing dir %s", fn) | |
fns = sorted( | |
sp.check_output(["git", "ls-files", fn]) | |
.decode('utf8') | |
.splitlines() | |
) | |
for fn in fns: | |
print("%s %s" % (md5sum(fn), fn)) | |
else: | |
raise ScriptError("not a file or a dir: %s" % fn) | |
def md5sum(*fns): | |
h = hashlib.md5() | |
for fn in fns: | |
with open(fn, "rb") as f: | |
while 1: | |
block = f.read(4096) | |
if not block: | |
break | |
h.update(block) | |
return h.hexdigest() | |
def hash_dockerfile(dfcont): | |
data = '\n'.join( | |
"%s %s" % (x['instruction'], x['value']) | |
for x in dfcont | |
if x['instruction'] != 'COMMENT' | |
) | |
h = hashlib.md5() | |
h.update(data.encode('utf8')) | |
return h.hexdigest() | |
def parse_cmdline(): | |
from argparse import ArgumentParser | |
parser = ArgumentParser(description=__doc__) | |
parser.add_argument( | |
'--context', help="Specify the build context [default: the FILE dir]", | |
) | |
parser.add_argument( | |
'--loglevel', | |
default='INFO', | |
choices=('DEBUG', 'INFO', 'WARN', 'ERROR'), | |
) | |
parser.add_argument( | |
'files', | |
nargs='+', | |
metavar='FILE', | |
help="Dockerfile to parse to find dependencies", | |
) | |
opt = parser.parse_args() | |
return opt | |
class ScriptError(Exception): | |
"""Controlled exception raised by the script.""" | |
if __name__ == '__main__': | |
try: | |
sys.exit(main()) | |
except ScriptError as e: | |
logger.error("%s", e) | |
sys.exit(1) | |
except Exception: | |
logger.exception("unexpected error") | |
sys.exit(1) | |
except KeyboardInterrupt: | |
logger.info("user interrupt") | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment