Skip to content

Instantly share code, notes, and snippets.

@aaugustin aaugustin/compression.py

Last active Dec 30, 2018
Embed
What would you like to do?
WebSocket compression benchmark
#!/usr/bin/env python
import getpass
import json
import pickle
import subprocess
import sys
import time
import zlib
CORPUS_FILE = "corpus.pkl"
def _corpus():
OAUTH_TOKEN = getpass.getpass("OAuth Token? ")
COMMIT_API = (
f'curl -H "Authorization: token {OAUTH_TOKEN}" '
f"https://api.github.com/repos/aaugustin/websockets/git/commits/:sha"
)
commits = []
head = subprocess.check_output("git rev-parse HEAD", shell=True).decode().strip()
todo = [head]
seen = set()
while todo:
sha = todo.pop(0)
commit = subprocess.check_output(COMMIT_API.replace(":sha", sha), shell=True)
commits.append(commit)
seen.add(sha)
for parent in json.loads(commit)["parents"]:
sha = parent["sha"]
if sha not in seen and sha not in todo:
todo.append(sha)
time.sleep(1) # rate throttling
return commits
def corpus():
data = _corpus()
with open(CORPUS_FILE, "wb") as handle:
pickle.dump(data, handle)
def _benchmark(data):
size = {}
duration = {}
for wbits in range(9, 16):
size[wbits] = {}
duration[wbits] = {}
for memLevel in range(1, 10):
encoder = zlib.compressobj(wbits=-wbits, memLevel=memLevel)
encoded = []
t0 = time.perf_counter()
for item in data:
if isinstance(item, str):
item = item.encode('utf-8')
# Taken from PerMessageDeflate.encode
item = encoder.compress(item) + encoder.flush(zlib.Z_SYNC_FLUSH)
if item.endswith(b"\x00\x00\xff\xff"):
item = item[:-4]
encoded.append(item)
t1 = time.perf_counter()
size[wbits][memLevel] = sum(len(item) for item in encoded)
duration[wbits][memLevel] = t1 - t0
raw_size = sum(len(item) for item in data)
print("=" * 79)
print("Compression ratio")
print("=" * 79)
print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)]))
for wbits in range(9, 16):
print(
"\t".join(
[str(wbits)]
+ [
f"{100 * (1 - size[wbits][memLevel] / raw_size):.1f}%"
for memLevel in range(1, 10)
]
)
)
print("=" * 79)
print()
print("=" * 79)
print("CPU time")
print("=" * 79)
print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)]))
for wbits in range(9, 16):
print(
"\t".join(
[str(wbits)]
+ [
f"{1000 * duration[wbits][memLevel]:.1f}ms"
for memLevel in range(1, 10)
]
)
)
print("=" * 79)
print()
print("=" * 79)
print("Size vs. 15 / 8")
print("=" * 79)
print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)]))
for wbits in range(9, 16):
print(
"\t".join(
[str(wbits)]
+ [
f"{100 * (size[wbits][memLevel] / size[15][8] - 1):.1f}%"
for memLevel in range(1, 10)
]
)
)
print("=" * 79)
print()
print("=" * 79)
print("Time vs. 15 / 8")
print("=" * 79)
print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)]))
for wbits in range(9, 16):
print(
"\t".join(
[str(wbits)]
+ [
f"{100 * (duration[wbits][memLevel] / duration[15][8] - 1):.1f}%"
for memLevel in range(1, 10)
]
)
)
print("=" * 79)
print()
def benchmark():
with open(CORPUS_FILE, "rb") as handle:
data = pickle.load(handle)
_benchmark(data)
try:
run = globals()[sys.argv[1]]
except (KeyError, IndexError):
print(f"Usage: {sys.argv[0]} [corpus|benchmark]")
else:
run()
Running benchmark @ 4034bc768f1adec08274ef28f62ea6e401d4e88e
===============================================================================
Compression ratio
===============================================================================
wb \ ml 1 2 3 4 5 6 7 8 9
9 49.2% 52.8% 54.7% 56.2% 56.2% 56.2% 56.2% 56.2% 56.2%
10 62.0% 64.0% 65.7% 65.8% 65.8% 65.8% 65.8% 65.8% 65.8%
11 79.6% 80.9% 81.1% 81.1% 81.1% 81.1% 81.1% 81.1% 81.1%
12 80.6% 81.7% 81.8% 81.8% 81.8% 81.8% 81.8% 81.8% 81.8%
13 81.1% 82.0% 82.1% 82.1% 82.1% 82.1% 82.1% 82.1% 82.1%
14 81.6% 82.2% 82.3% 82.3% 82.3% 82.3% 82.3% 82.3% 82.3%
15 82.0% 82.5% 82.5% 82.6% 82.6% 82.6% 82.6% 82.6% 82.6%
===============================================================================
===============================================================================
CPU time
===============================================================================
wb \ ml 1 2 3 4 5 6 7 8 9
9 47.6ms 35.8ms 28.3ms 23.8ms 25.1ms 28.6ms 39.8ms 57.8ms 95.4ms
10 40.3ms 32.4ms 25.3ms 24.2ms 23.7ms 25.5ms 30.0ms 40.3ms 60.9ms
11 24.2ms 19.5ms 18.7ms 17.3ms 18.0ms 17.8ms 19.6ms 23.3ms 32.8ms
12 26.6ms 20.0ms 19.5ms 18.1ms 17.3ms 17.2ms 18.0ms 20.1ms 24.8ms
13 35.0ms 25.2ms 23.4ms 20.8ms 19.6ms 19.8ms 19.3ms 20.2ms 23.4ms
14 48.8ms 34.6ms 30.5ms 24.0ms 21.7ms 20.3ms 20.2ms 19.1ms 21.1ms
15 64.6ms 44.2ms 39.9ms 29.9ms 26.1ms 24.1ms 21.8ms 21.4ms 23.8ms
===============================================================================
===============================================================================
Size vs. 15 / 8
===============================================================================
wb \ ml 1 2 3 4 5 6 7 8 9
9 191.7% 171.1% 160.0% 151.8% 151.6% 151.6% 151.6% 151.6% 151.6%
10 118.5% 107.0% 97.2% 96.6% 96.5% 96.5% 96.5% 96.5% 96.5%
11 16.9% 9.8% 8.7% 8.5% 8.5% 8.5% 8.5% 8.5% 8.5%
12 11.3% 5.2% 4.5% 4.4% 4.4% 4.4% 4.4% 4.4% 4.4%
13 8.3% 3.5% 2.9% 2.8% 2.8% 2.8% 2.8% 2.8% 2.8%
14 5.7% 2.1% 1.6% 1.5% 1.5% 1.5% 1.5% 1.5% 1.5%
15 3.5% 0.6% 0.3% 0.1% 0.0% 0.0% 0.0% 0.0% 0.0%
===============================================================================
===============================================================================
Time vs. 15 / 8
===============================================================================
wb \ ml 1 2 3 4 5 6 7 8 9
9 122.8% 67.6% 32.6% 11.2% 17.6% 34.1% 86.2% 170.4% 346.5%
10 88.7% 51.5% 18.2% 13.3% 11.1% 19.5% 40.2% 88.7% 184.9%
11 13.4% -8.8% -12.5% -18.8% -15.5% -16.6% -8.3% 9.2% 53.5%
12 24.6% -6.4% -8.8% -15.5% -18.9% -19.5% -16.0% -6.2% 16.0%
13 63.9% 17.8% 9.5% -2.9% -8.0% -7.5% -9.6% -5.7% 9.6%
14 128.4% 62.1% 42.9% 12.5% 1.4% -5.0% -5.3% -10.8% -1.4%
15 202.4% 107.0% 86.9% 39.8% 22.0% 12.9% 2.1% 0.0% 11.4%
===============================================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.