Comparing json compression methods.
make
to generate two json files containing the same set of data,
but one is compressed using google+-style minification. Those two files
are then gzipped.
There is a 22% difference in gzipped sizes.
*.gz | |
*.json | |
.*.swp |
import random | |
from itertools import chain | |
import json | |
ATTR1 = 'some_very_long_name' | |
ATTR2 = 'another_very_long_age' | |
ATTR3 = 'we_also_got_some_qq_numbers' | |
ATTR4 = 'and_the_users_phone' | |
def mk_object(uniq): | |
return { | |
ATTR1: 'name_%d' % uniq, | |
ATTR2: uniq, | |
ATTR3: [mk_number() for _ in xrange(random.randint(0, 10))], | |
ATTR4: mk_number() | |
} | |
def mk_number(): | |
if random.random() > 0.5: | |
return random.randint(10000000, 99999999) | |
return None | |
def to_array(o): | |
return [o[ATTR1], o[ATTR2], o[ATTR3], o[ATTR4]] | |
objects = [mk_object(i) for i in xrange(10000)] | |
# Flattened and stripped | |
obj_array = list(chain(*[to_array(o) for o in objects])) | |
with open('objects.json', 'w') as f: | |
json.dump(objects, f) | |
jstr = json.dumps(obj_array).replace(' ', '').replace('null', '') | |
with open('obj_array.json', 'w') as f: | |
f.write(jstr) | |
all : obj_array.json.gz objects.json.gz | |
%.json: gen.py | |
python gen.py | |
%.json.gz : %.json | |
gzip -c $< > $@ |