Skip to content

Instantly share code, notes, and snippets.

@dvarrazzo
Created February 1, 2020 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dvarrazzo/527287ca59c13b68ea6d484c04c1a310 to your computer and use it in GitHub Desktop.
Save dvarrazzo/527287ca59c13b68ea6d484c04c1a310 to your computer and use it in GitHub Desktop.
Stable hash of Dockerfile content
#!/usr/bin/env python3
"""Emit an hash on stdout of a dockerfile and the files added by it.
- Dockerfile comments don't affect the hash value.
- If a directory is added by dockerfile, only the git-versioned files are
included in the hash.
- A single file hash matches md5sum result.
"""
import os
import sys
import hashlib
import subprocess as sp
from glob import glob
from dockerfile_parse import DockerfileParser
import logging
logger = logging.getLogger()
logging.basicConfig(
level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s'
)
def main():
opt = parse_cmdline()
logger.setLevel(getattr(logging, opt.loglevel))
for dfn in opt.files:
with open(dfn) as f:
dfp = DockerfileParser(fileobj=f)
dfcont = dfp.structure
logger.debug("hashing dockerfile %s", dfn)
print("%s %s" % (hash_dockerfile(dfcont), dfn))
ctx = opt.context or os.path.dirname(dfn)
for stmt in dfcont:
if stmt['instruction'] not in ('ADD', 'COPY'):
continue
if stmt['instruction'] == 'COPY' and '--from' in stmt['value']:
continue
logger.debug("%s: %s", stmt['instruction'], stmt['value'])
values = stmt['value'].split()
if len(values) < 2:
raise ScriptError("can't see what to add here: %s" % stmt())
fns = []
for v in values[:-1]:
v = os.path.join(ctx, v)
if os.path.exists(v):
fns.append(v)
else:
v = glob(v)
if v:
v.sort()
fns.extend(v)
else:
raise ScriptError(
"Can't find this file: %s %s"
% (stmt['instruction'], stmt['value'])
)
for fn in fns:
if os.path.isfile(fn):
logger.debug("hashing file %s", fn)
print("%s %s" % (md5sum(fn), fn))
elif os.path.isdir(fn):
logger.debug("hashing dir %s", fn)
fns = sorted(
sp.check_output(["git", "ls-files", fn])
.decode('utf8')
.splitlines()
)
for fn in fns:
print("%s %s" % (md5sum(fn), fn))
else:
raise ScriptError("not a file or a dir: %s" % fn)
def md5sum(*fns):
h = hashlib.md5()
for fn in fns:
with open(fn, "rb") as f:
while 1:
block = f.read(4096)
if not block:
break
h.update(block)
return h.hexdigest()
def hash_dockerfile(dfcont):
data = '\n'.join(
"%s %s" % (x['instruction'], x['value'])
for x in dfcont
if x['instruction'] != 'COMMENT'
)
h = hashlib.md5()
h.update(data.encode('utf8'))
return h.hexdigest()
def parse_cmdline():
from argparse import ArgumentParser
parser = ArgumentParser(description=__doc__)
parser.add_argument(
'--context', help="Specify the build context [default: the FILE dir]",
)
parser.add_argument(
'--loglevel',
default='INFO',
choices=('DEBUG', 'INFO', 'WARN', 'ERROR'),
)
parser.add_argument(
'files',
nargs='+',
metavar='FILE',
help="Dockerfile to parse to find dependencies",
)
opt = parser.parse_args()
return opt
class ScriptError(Exception):
"""Controlled exception raised by the script."""
if __name__ == '__main__':
try:
sys.exit(main())
except ScriptError as e:
logger.error("%s", e)
sys.exit(1)
except Exception:
logger.exception("unexpected error")
sys.exit(1)
except KeyboardInterrupt:
logger.info("user interrupt")
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment