Skip to content

Instantly share code, notes, and snippets.

@HalCanary
Last active November 20, 2020 15:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save HalCanary/9abb045bf047da118c2467266bbeab0b to your computer and use it in GitHub Desktop.
Save HalCanary/9abb045bf047da118c2467266bbeab0b to your computer and use it in GitHub Desktop.
#! /usr/bin/env python
# Copyright 2019 Google LLC.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
Given an index file where each line is of the format:
HASH_NAME HASH_VALUE URL PATH
Where:
* HASH_NAME is one of sha1, sha224, sha384, sha256, sha512, or md5.
* HASH_VALUE is a string of characters 0-9a-f, the result of applying
HASH_NAME.
* URL is a valid URL that returns data with the given hash value. If the
string `{}` is in the URL, it is to be replaced with HASH_VALUE.
* PATH is a file path relative to the directory containing the index file.
This script will check to see if path is up to date. If not, it will
attempt to get the file from the URL. Unless the environment variable
SYNC_FILES_QUIET is set, it will print each file as it is processed.
'''
import hashlib
import os
import sys
if sys.version_info >= (3,0,0):
from urllib.request import urlretrieve
else:
from urllib import urlretrieve
def hasher(name, p):
if os.path.exists(p):
m = hashlib.new(name)
with open(p, "rb") as f:
for v in iter(lambda: f.read(4096), ''):
m.update(v)
return m.hexdigest()
return None
def update(hashname, checksum, uri, path):
if hasher(hashname, path) != checksum:
directory = os.path.dirname(path)
if directory and not os.path.exists(directory):
os.makedirs(directory)
urlretrieve(uri, path)
if hasher(hashname, path) != checksum:
os.remove(path)
assert False
def parse_index_file(filepath):
directory = os.path.dirname(filepath)
with open(filepath) as f:
for l in f:
hashname, checksum, uri, path = l.split()
yield (hashname, checksum,
uri.format(checksum) if '{}' in uri else uri,
os.path.join(directory, path))
def main(argv, verbose):
for filepath in argv:
for hashname, checksum, uri, path in parse_index_file(filepath):
if verbose:
sys.stderr.write('{}:{}, {} -> {}\n'.format(
hashname, checksum, uri, path))
update(hashname, checksum, uri, path)
if __name__ == '__main__':
assert len(sys.argv) > 1
main(sys.argv[1:], 'SYNC_FILES_QUIET' not in os.environ)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment