Skip to content

Instantly share code, notes, and snippets.

@peterbe peterbe/download-svgs.py
Last active Apr 4, 2019

Embed
What would you like to do?
import hashlib
import os
import shutil
from collections import defaultdict
from glob import glob
from subprocess import check_call
import pyquery
def md5(s):
return hashlib.md5(s.encode('utf-8')).hexdigest()[:7]
shutil.rmtree('downloaded')
os.makedirs('downloaded')
doc = pyquery.PyQuery('https://developer.mozilla.org/en-US/')
counts = defaultdict(list)
for svg in doc('svg').items():
name = str(repr(svg))[2:-2]
html = svg.outer_html()
name += '_' + md5(html)
if name not in counts:
fn = 'downloaded/' + name + '.svg'
with open(fn, 'w') as f:
f.write(html)
f.write('\n')
check_call(("svgo {} -o {}".format(fn, fn.replace('.svg','.min.svg'))).split())
counts[name].append(len(html))
for x in glob('downloaded/*.min.svg'):
check_call(("zopfli -i100 {}".format(x)).split())
check_call(("brotli -9 {}".format(x)).split())
def kb(b):
return '{:.1f}KB'.format(b / 1024)
# print(counts)
print("TOTAL BYTES BEFORE ({}): {:,} ({})".format(
sum(len(x) for x in counts.values()),
sum(sum(x) for x in counts.values()),
kb(sum(sum(x) for x in counts.values()))
))
after = {
x: os.stat(x).st_size
for x in glob('downloaded/*.min.svg')
}
# print(after)
print("TOTAL BYTES AFTER ({}): {:,} ({})".format(
len(after),
sum(after.values()),
kb(sum(after.values()))
))
gzipped = {
x: os.stat(x).st_size
for x in glob('downloaded/*.min.svg.gz')
}
# print(gzipped)
print("TOTAL BYTES ZOPFLI ({}): {:,} ({})".format(
len(gzipped),
sum(gzipped.values()),
kb(sum(gzipped.values()))
))
brotlied = {
x: os.stat(x).st_size
for x in glob('downloaded/*.min.svg.br')
}
# print(brotlied)
print("TOTAL BYTES BROTLI ({}): {:,} ({})".format(
len(brotlied),
sum(brotlied.values()),
kb(sum(brotlied.values()))
))
@peterbe

This comment has been minimized.

Copy link
Owner Author

peterbe commented Apr 4, 2019

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.