Skip to content

Instantly share code, notes, and snippets.

@iscgar
Created June 22, 2018 18:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iscgar/98b1bf6e5ade52699d0888a528ab78e9 to your computer and use it in GitHub Desktop.
Save iscgar/98b1bf6e5ade52699d0888a528ab78e9 to your computer and use it in GitHub Desktop.
Quick and dirty Rust ditribution (rustup) mirror script (prints are now messed up due to added parallelism)
from __future__ import print_function
import os
import sys
import re
from datetime import datetime, timedelta
import pickle
from multiprocessing import cpu_count
from multiprocessing.dummy import Pool as ThreadPool
import toml
import requests
import hashlib
import time
import colorama
from colorama import Fore
colorama.init()
if sys.version_info >= (3,):
raw_input = input
class TargetUrl(object):
def __init__(self, url, hash):
super(TargetUrl, self).__init__()
self.url = str(url)
self.hash = str(hash)
def __repr__(self):
return 'TargetUrl({}, {})'.format(
self.url.decode('utf-8'), self.hash.decode('utf-8'))
class Target(object):
def __init__(self, nm, urls):
super(Target, self).__init__()
self.name = str(nm)
self.urls = tuple(urls)
def __str__(self):
return self.name.decode('utf-8')
__repr__ = __str__
def __hash__(self):
return hash(self.name)
class Version(object):
_V2_MANIFEST = re.compile(r'^\(([\da-fA-F]+)\s+(\d{4}-\d{2}-\d{2})\)$')
_V1_MANIFEST = re.compile(r'^(\d+\.\d+\.\d+)(-.+)?$')
def __init__(self, s):
super(Version, self).__init__()
s = str(s).strip()
v = s.split(' ', 1)
m = self._V1_MANIFEST.match(v[0].strip())
if not m:
raise ValueError('Unknown version format')
if len(v) == 2:
hm = self._V2_MANIFEST.match(v[1].strip())
if not hm:
raise ValueError('Unknown v2 format')
self.hash = hm.group(1)
self.date = datetime.strptime(hm.group(2), '%Y-%m-%d')
self.tag = m.group(2)[1:] if m.group(2) else ''
self.major, self.minor, self.build = tuple(
int(_) for _ in m.group(1).split('.'))
def __hash__(self):
return hash((self.major, self.minor, self.build, self.tag))
def __eq__(self, o):
return self.major == o.major and self.minor == o.minor \
and self.build == o.build and self.tag == o.tag
def __ne__(self, o):
return self.major != o.major or self.minor != o.minor \
or self.build != o.build or self.tag != o.tag
def __gt__(self, o):
return self.major > o.major or self.major == o.major and (
self.minor > o.minor or self.minor == o.minor and (
self.build > o.build or
self.build == o.build and self.tag > o.tag))
def __ge__(self, o):
return self == o or self > o
def __lt__(self, o):
return self.major < o.major or self.major == o.major and (
self.minor < o.minor or self.minor == o.minor and (
self.build < o.build or
self.build == o.build and self.tag < o.tag))
def __le__(self, o):
return self == o or self < o
def __str__(self):
s = '{}.{}.{}{}'.format(
self.major, self.minor, self.build,
'' if not self.tag else '-{}'.format(self.tag))
if hasattr(self, 'hash'):
s += ' ({} {})'.format(self.hash, self.date.strftime('%Y-%m-%d'))
return s
class Package(object):
def __init__(self, nm, data):
super(Package, self).__init__()
self._parse(nm, data)
def _parse(self, nm, data):
self.name = nm
self.version = Version(data[u'version'])
self.targets = []
targs = data[u'target']
for target in filter(lambda t: targs[t][u'available'], targs):
trg = targs[target]
prefixes = (s[:-3] for s in trg if s.endswith(u'url'))
self.targets.append(Target(
target.strip(),
(TargetUrl(
trg[pfx + u'url'].strip(),
trg[pfx + u'hash'].strip().lower()) for pfx in prefixes)
))
def __str__(self):
return 'Package(%s-%s)' % (
self.name,
self.version)
def __repr__(self):
return 'Package(%s)\n %s' % (
self.name,
',\n '.join(
'%r: %r' % (k.upper(), getattr(self, k))
for k in dir(self)
if not k.startswith('_')))
def __hash__(self):
return hash(self.name)
def __eq__(self, o):
return self.name == o.name and self.version == o.version
def __ne__(self, o):
return self.name != o.name or self.version != o.version
def __gt__(self, o):
return self.name > o.name \
or self.name == o.name and self.version > o.version
def __ge__(self, o):
return self.name >= o.name \
or self.name == o.name and self.version >= o.version
def __lt__(self, o):
return self.name < o.name \
or self.name == o.name and self.version < o.version
def __le__(self, o):
return self.name <= o.name \
or self.name == o.name and self.version <= o.version
STATIC_RUST_URL = 'static.rust-lang.org'
URL_TEMPLATE = 'https://{}/{{}}'.format(STATIC_RUST_URL)
CHANNEL_MANIFEST_PATH_TEMPLATE = 'dist/channel-rust-{}.toml'
RUSTUP_MANIFEST_PATH = 'rustup/release-stable.toml'
HASH_EXT = '.sha256'
SIG_EXT = '.asc'
V1_EXTS = ('', HASH_EXT)
V2_EXTS = ('', HASH_EXT, SIG_EXT)
DEST_DIR = 'rustup-repo'
CACHED_DB_PATH = os.path.join(DEST_DIR, 'cached.db')
DIST_DIR = os.path.join(DEST_DIR, STATIC_RUST_URL)
TARGET_SKIPS = (
'apple',
'android',
'emscripten',
'wasm',
's390x',
'netbsd',
'freebsd',
'fuchsia',
'sparc',
'solaris',
'redox'
)
CHANNELS = (
('stable', timedelta(seconds=1), TARGET_SKIPS),
('beta', timedelta(weeks=25), ()),
('nightly', timedelta(weeks=6), TARGET_SKIPS)
)
def _format_size(s):
SIZES = ('B', 'KiB', 'MiB', 'GiB')
f = 0
s = float(s)
while s > 1024:
s /= 1024
f += 1
return (s, SIZES[f])
def get_url_changed_date(url):
return datetime.strptime(
requests.head(url).headers.get('Last-Modified'),
'%a, %d %b %Y %H:%M:%S %Z')
def _download(url, out):
print('Downloading `{}`...'.format(url))
outdir = os.path.dirname(out)
if not os.path.isdir(outdir):
os.makedirs(outdir)
r = requests.get(url, timeout=20, stream=True)
total = int(r.headers.get('content-length'))
with open(out, 'wb') as of:
done = 0
for chunk in r.iter_content(chunk_size=102400):
of.write(chunk)
done += len(chunk)
print(' {:6.2f}%'.format(float(done) / total * 100), end='\r')
print(' {:7.2f} {}'.format(*_format_size(done)))
def download_v1_pkg(fname, hname, out_dir):
def remove_v1_package():
for f in (fname, hname):
if f:
os.remove(os.path.join(out_dir, f))
while True:
for f in (fname, hname):
ffpath = os.path.join(out_dir, f)
# Skip already downloaded files and continue to validation
if os.path.isfile(ffpath):
continue
while True:
try:
_download(
URL_TEMPLATE.format(f), ffpath)
break
except requests.Timeout:
print('Timed out on {}. Retrying...'.format(f))
time.sleep(5)
except requests.HTTPError as ex:
if ex.response.status == 404:
print('{} could not be found. Skipping.'.format(f))
return
print('HTTP error {}'.format(ex))
print('Verifying v1 package `{}`... '.format(fname), end='')
with open(os.path.join(
out_dir, hname), 'rb') as hf:
hd = hf.read()
try:
if fname.endswith('.exe'):
h = hd.decode('ascii').strip()
else:
h, fn = tuple(
_.strip() for _ in hd.decode('ascii').strip().split())
if os.path.basename(fn) != os.path.basename(fname):
print('{}FAILED: file name mismatch. Retrying.{}'.format(
Fore.YELLOW, Fore.RESET))
remove_v1_package()
continue
h = h.lower()
except:
print('{}FAILED: hash file parsing failed. Retrying.{}'.format(
Fore.YELLOW, Fore.RESET))
remove_v1_package()
continue
with open(os.path.join(out_dir, fname), 'rb') as pf:
hr = hashlib.sha256()
for chunk in iter(lambda: pf.read(1024), b''):
hr.update(chunk)
if h != hr.hexdigest():
print('{}FAILED: hash mismatch. Retrying.{}'.format(
Fore.YELLOW, Fore.RESET))
remove_v1_package()
continue
# Everything's good
print('{}OK{}'.format(Fore.GREEN, Fore.RESET))
break
def download_v2_pkg(fname, expected_hash, out_dir):
def remove_v2_package():
for ext in V2_EXTS:
ffext = '{}{}'.format(fname, ext)
os.remove(os.path.join(out_dir, ffext))
while True:
for ext in V2_EXTS:
ffext = '{}{}'.format(fname, ext)
ffpath = os.path.join(out_dir, ffext)
# Skip already downloaded files and continue to validation
if os.path.isfile(ffpath):
continue
while True:
try:
_download(
URL_TEMPLATE.format(ffext), ffpath)
break
except requests.Timeout:
print('{}Timed out on {}. Retrying...{}'.format(
Fore.YELLOW, ffext, Fore.RESET))
time.sleep(5)
except requests.HTTPError as ex:
if ex.response.status == 404:
print('{}{} could not be found. Skipping.{}'.format(
Fore.YELLOW, ffext, Fore.RESET))
return
print('{}HTTP error: {}{}'.format(
Fore.RED, ex, Fore.RESET))
print('Verifying v2 package `{}`... '.format(
os.path.basename(fname)), end='')
with open(os.path.join(
out_dir, '{}{}'.format(fname, HASH_EXT)), 'rb') as hf:
hd = hf.read()
try:
h, fn = tuple(
_.strip() for _ in hd.decode('ascii').strip().split())
h = h.lower()
except:
print('{}FAILED: hash file parsing failed. Retrying.{}'.format(
Fore.YELLOW, Fore.RESET))
remove_v2_package()
continue
if fn != os.path.basename(fname):
print('{}FAILED: file name mismatch. Retrying.{}'.format(
Fore.YELLOW, Fore.RESET))
remove_v2_package()
continue
if expected_hash and h != expected_hash:
print('{}FAILED: expected hash mismatch. Retrying.{}'.format(
Fore.YELLOW, Fore.RESET))
remove_v2_package()
continue
with open(os.path.join(out_dir, fname), 'rb') as pf:
hr = hashlib.sha256()
for chunk in iter(lambda: pf.read(512000), b''):
hr.update(chunk)
if h != hr.hexdigest():
print('{}FAILED: hash mismatch. Retrying.{}'.format(
Fore.YELLOW, Fore.RESET))
remove_v2_package()
continue
# TODO: Verify sig
print('{}OK{}'.format(Fore.GREEN, Fore.RESET))
break
def write_diff_file(ufname, removed, updated):
DIFF_LINE_TEMPLATE = 'rget -O "{0}" "{1}{0}"'
def get_diff_line(pkg):
url = URL_TEMPLATE.format(pkg)
res = requests.head(url)
if res.status_code != 200:
return '#:{} {}'.format(res.status_code, url)
return DIFF_LINE_TEMPLATE.format(pkg, url)
print('Writing updated diff to {}...'.format(ufname))
with open(ufname, 'w') as uf:
# File header
print('#!/bin/sh', file=uf)
print('alias "rget=wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 0"', file=uf)
# Removed list
print('# --- Removed ---', file=uf)
for pkg in removed:
print('rm -f "{}"'.format(pkg), file=uf)
# Updated dirs
print('# --- Updated ---', file=uf)
for dn in sorted(set(os.path.dirname(pkg) for pkg in updated)):
print('mkdir -p "{}"'.format(dn), file=uf)
# Updated list
for i, diff_line in enumerate(
ThreadPool(cpu_count()).imap_unordered(
get_diff_line, updated), 1):
print(diff_line, file=uf)
print('{:3d}%'.format(int(i * 100 / len(updated))), end='\r')
print('\nDone.')
def update_v1_channel(manifest_path, update_delta, ch_fname_func, targets, db):
changed = get_url_changed_date(URL_TEMPLATE.format(manifest_path))
if changed - db['updated'] >= update_delta:
manifest = toml.loads(
requests.get(
URL_TEMPLATE.format(manifest_path)).content.decode('utf-8'))
assert manifest.get(u'schema-version', u'') == u'1', (
"Unsupported manifest version `{}'".format(
manifest.get(u'schema-version', u'')))
nv = Version(manifest[u'version'])
ov = db['version']
if nv > ov:
removed = []
updated = []
for target in db['targets']:
fname = ch_fname_func(ov, target)
removed.append(tuple('%s%s' % (fname, ext) for ext in V1_EXTS))
ntargets = tuple(targets)
for target in ntargets:
fname = ch_fname_func(nv, target)
updated.append(tuple('%s%s' % (fname, ext) for ext in V1_EXTS))
return (changed, nv, ntargets, removed, updated)
return (changed, db['version'], db['targets'], (), ())
def update_v2_channel(manifest_path, update_delta, skips, db):
changed = get_url_changed_date(URL_TEMPLATE.format(manifest_path))
if changed - db['updated'] >= update_delta:
print('Updating components...')
manifest = toml.loads(
requests.get(
URL_TEMPLATE.format(manifest_path)).content.decode('utf-8'))
assert manifest.get(u'manifest-version', u'') == u'2', (
"Unsupported manifest version `{}'".format(
manifest.get(u'manifest-version', u'')))
packages = {}
removed = []
updated = []
for pkg, pkg_desc in manifest[u'pkg'].items():
np = Package(pkg, pkg_desc)
op = db['pkgs'].get(pkg, None)
if op:
if np < op:
print('Package {} went back (from {} to {})'.format(
pkg, op.version, np.version))
break
removed_targets = tuple(
t for t in op.targets if t not in np.targets or
any(s in t.name for s in skips))
if removed_targets:
print('{}WARNING{}: The following targets were removed from {}:'.format(
Fore.YELLOW, Fore.RESET, pkg))
for t in removed_targets:
print(' - {}'.format(t.name))
if raw_input('Continue? [y/N] ').strip().lower() != 'y':
break
if np > op:
removed_targets = op.targets
# Remove old targets
for target in removed_targets:
for u in target.urls:
fname = u.url.replace(URL_TEMPLATE.format(''), '')
removed.append((fname, u.hash))
if op and np == op:
# Construct updated target list
updated_targets = (
t for t in np.targets
if t not in op.targets and all(
s not in t.name for s in skips))
else:
# Add all new targets
updated_targets = (
t for t in np.targets
if all(s not in t.name for s in skips))
# Add updated targets
for target in updated_targets:
for u in target.urls:
fname = u.url.replace(URL_TEMPLATE.format(''), '')
updated.append((fname, u.hash))
packages[pkg] = np
else:
return (changed, packages, removed, updated)
return (changed, db['pkgs'], (), ())
def get_rustup_filename(ver, target):
return 'rustup/archive/{}/{}/rustup-init{}'.format(
str(ver), target, '.exe' if 'windows' in target else '')
# Load db from cache
print('Loading cached db...')
try:
with open(CACHED_DB_PATH, 'rb') as dbf:
db = pickle.load(dbf)
except:
db = {
'rustup': {
'updated': datetime(1970, 1, 1),
'version': Version('0.0.0'),
'targets': []
},
'dist': {}
}
print('Done.')
updated = []
removed = []
pool = ThreadPool(cpu_count())
print('--- Checking for channel updates...')
for channel, delta, skips in CHANNELS:
repfile = CHANNEL_MANIFEST_PATH_TEMPLATE.format(channel)
print('=========================')
print(repfile)
print('=========================')
try:
ch_db = db['dist'][channel]
except KeyError:
ch_db = db['dist'][channel] = {
'updated': datetime(1970, 1, 1),
'pkgs': {}
}
(ch_db['updated'], ch_db['pkgs'],
ch_removed, ch_updated) = update_v2_channel(
repfile, delta, skips, ch_db)
if not ch_removed and not ch_updated:
print('No changes for {}'.format(channel))
else:
# Add removed files to list
for f, _ in ch_removed:
for ext in V2_EXTS:
removed.append('{}{}'.format(f, ext))
# Add updated files to list
for f, h in ch_updated:
for ext in V2_EXTS:
updated.append('{}{}'.format(f, ext))
for _ in pool.imap_unordered(
lambda t: download_v2_pkg(*t, out_dir=DIST_DIR), ch_updated):
pass
# Update manifest file
for ext in V2_EXTS:
if ch_removed:
removed.append('{}{}'.format(repfile, ext))
updated.append('{}{}'.format(repfile, ext))
download_v2_pkg(repfile, None, DIST_DIR)
print('--- Checking for rustup updates...')
ru_db = db['rustup']
(ru_db['updated'], ru_db['version'], ru_db['targets'],
ru_removed, ru_updated) = update_v1_channel(
RUSTUP_MANIFEST_PATH, timedelta(seconds=1), get_rustup_filename,
(t.name for t in db['dist']['stable']['pkgs']['cargo'].targets), ru_db)
if not ru_removed and not ru_updated:
print('No changes for rustup')
else:
# Update removed list
for f, h in ru_removed:
removed.append(f)
removed.append(h)
# Update removed list and download packages
for f, h in ru_updated:
updated.append(f)
updated.append(h)
for _ in pool.imap_unordered(
lambda t: download_v1_pkg(*t, out_dir=DIST_DIR), ru_updated):
pass
# Update the manifest
if ru_removed:
removed.append(RUSTUP_MANIFEST_PATH)
updated.append(RUSTUP_MANIFEST_PATH)
_download(URL_TEMPLATE.format(RUSTUP_MANIFEST_PATH),
os.path.join(DIST_DIR, RUSTUP_MANIFEST_PATH))
if not updated:
print('Local db is up to date. Nothing to do.')
else:
write_diff_file(os.path.join(DEST_DIR, '{}.update'.format(
datetime.now().strftime('%Y%m%d-%H%M%S'))), removed, updated)
print('Updating cached db...')
with open(CACHED_DB_PATH, 'wb') as dbf:
pickle.dump(db, dbf)
print('Done.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment