-
-
Save catlee/80e4abae57d98180b755838ddbda6e04 to your computer and use it in GitHub Desktop.
Analyze compression formats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from pathlib import Path | |
import logging | |
from contextlib import contextmanager | |
import shutil | |
import tempfile | |
import subprocess | |
import time | |
import requests | |
log = logging.getLogger(__name__) | |
class Timer: | |
def __init__(self): | |
self.results = [] | |
self.start = 0 | |
def __enter__(self): | |
self.start = time.time() | |
def __exit__(self, *args, **kwargs): | |
e = time.time() - self.start | |
self.results.append(e) | |
@contextmanager | |
def tmpdir(): | |
d = tempfile.mkdtemp() | |
try: | |
yield Path(d) | |
finally: | |
shutil.rmtree(d) | |
def download_file(url, dest): | |
resp = requests.get(url) | |
dest.write_bytes(resp.content) | |
def unpack(f, d): | |
"""Unpack archive f into directory d""" | |
log.debug("Unpacking %s into %s", f, d) | |
d.mkdir(exist_ok=True) | |
if f.suffix == ".zip": | |
subprocess.run(["unzip", "-q", f.resolve()], cwd=d, check=True) | |
elif f.suffix in (".tar", ".tgz") or f.suffixes[-2] == ".tar": | |
subprocess.run(["tar", "xf", f.resolve()], cwd=d, check=True) | |
elif f.suffix in (".dmg", ".exe"): | |
subprocess.run( | |
["7z", "x", f.resolve()], | |
cwd=d, | |
check=True, | |
stderr=subprocess.STDOUT, | |
stdout=subprocess.DEVNULL, | |
) | |
else: | |
raise ValueError("Unsupported archive type: %s" % f.suffixes) | |
def tar_up(d, f, compress_command): | |
"""Tar up directory d into a tarball f using compress_command, which should | |
produce its compressed output on stdout""" | |
tar_proc = subprocess.Popen( | |
["tar", "c", d.name], cwd=d.parent, stdout=subprocess.PIPE | |
) | |
compress_proc = subprocess.Popen( | |
compress_command, stdin=tar_proc.stdout, stdout=f.open("wb") | |
) | |
compress_proc.wait() | |
tar_proc.wait() | |
if compress_proc.returncode != 0: | |
raise OSError("Compression failed") | |
if tar_proc.returncode != 0: | |
raise OSError("Tarring failed") | |
def analyze_formats(f): | |
attempts = 3 | |
methods = [ | |
("xz (default)", ["xz", "-T0", "-c"], ".tar.xz"), | |
("xz (max)", ["xz", "-T0", "-9e", "-c"], ".tar.xz"), | |
("zstd (default)", ["zstd", "-T0", "-c"], ".tar.zst"), | |
("zstd (max)", ["zstd", "-T0", "-19", "-c"], ".tar.zst"), | |
("gzip (default)", ["gzip", "-c"], ".tar.gz"), | |
("gzip (max)", ["gzip", "-9", "-c"], ".tar.gz"), | |
("bzip2 (default)", ["bzip2", "-c"], ".tar.bz2"), | |
] | |
results = {} | |
# First unpack it | |
with tmpdir() as d: | |
unpack_dir = d / "unpacked" | |
log.debug("unpacking %s into %s", f, unpack_dir) | |
unpack(f, unpack_dir) | |
for name, compress_command, suffix in methods: | |
t = Timer() | |
dest = d / f"compressed{suffix}" | |
log.info("testing method %s", name) | |
log.debug( | |
"%s: compressing %s into %s using %s", | |
name, | |
unpack_dir, | |
dest, | |
compress_command, | |
) | |
sizes = [] | |
for i in range(attempts): | |
if dest.exists(): | |
dest.unlink() | |
with t: | |
log.debug("compress attempt %d/%d", i + 1, attempts) | |
tar_up(unpack_dir, dest, compress_command) | |
sizes.append(dest.stat().st_size) | |
log.debug("compression times: %s", t.results) | |
log.info("best compression time: %.2f", min(t.results)) | |
log.debug("sizes: %s", sizes) | |
if not len(set(sizes)) == 1: | |
log.warning("output size is non-deterministic!") | |
results[name] = {"compress_time": min(t.results), "size": min(sizes)} | |
t = Timer() | |
for i in range(attempts): | |
with tmpdir() as d1: | |
with t: | |
log.debug("decompress attempt %d/%d", i + 1, attempts) | |
unpack(dest, d1) | |
log.debug("decompression times: %s", t.results) | |
log.info("best decompression time: %.2f", min(t.results)) | |
results[name]["decompress_time"] = min(t.results) | |
return results | |
def calc_fitness(compress_time, decompress_time, size): | |
transfer_speed = 50_000_000 # 50 MB/s | |
transfer_time = size / transfer_speed | |
return compress_time + 20 * (decompress_time + transfer_time) | |
if __name__ == "__main__": | |
logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO) | |
DATA_DIR = (Path(__file__).parent / "data").resolve() | |
DATA_DIR.mkdir(exist_ok=True) | |
PACKAGES = [ | |
"https://archive.mozilla.org/pub/firefox/nightly/2019/05/2019-05-29-21-52-51-mozilla-central/mozharness.zip", | |
"https://archive.mozilla.org/pub/firefox/nightly/2019/05/2019-05-29-21-52-51-mozilla-central/firefox-69.0a1.en-US.win64.common.tests.tar.gz", | |
"https://archive.mozilla.org/pub/firefox/nightly/2019/05/2019-05-29-21-52-51-mozilla-central/firefox-69.0a1.en-US.win64.installer.exe", | |
"https://archive.mozilla.org/pub/firefox/nightly/2019/05/2019-05-29-21-52-51-mozilla-central/firefox-69.0a1.en-US.linux-x86_64.web-platform.tests.tar.gz", | |
"https://archive.mozilla.org/pub/firefox/nightly/2019/05/2019-05-29-21-52-51-mozilla-central/firefox-69.0a1.en-US.mac.crashreporter-symbols.zip", | |
"https://archive.mozilla.org/pub/firefox/nightly/2019/05/2019-05-29-21-52-51-mozilla-central/firefox-69.0a1.en-US.mac.dmg", | |
"https://archive.mozilla.org/pub/firefox/nightly/2019/05/2019-05-29-21-52-51-mozilla-central/firefox-69.0a1.en-US.win64.zip", | |
"https://archive.mozilla.org/pub/firefox/nightly/2019/05/2019-05-29-21-52-51-mozilla-central/firefox-69.0a1.en-US.linux-x86_64.tar.bz2", | |
] | |
for url in PACKAGES: | |
name = url.split("/")[-1] | |
dest = DATA_DIR / name | |
if not dest.exists(): | |
log.info("Downloading %s to %s", url, dest) | |
download_file(url, dest) | |
log.info("Analyzing %s", dest.name) | |
results = analyze_formats(dest) | |
fitness = [(name, calc_fitness(**result)) for name, result in results.items()] | |
log.info("FITNESS for %s:", dest.name) | |
for name, f in sorted(fitness, key=lambda x: x[1], reverse=True): | |
log.info("%s: %.2f", name, f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment