Skip to content

Instantly share code, notes, and snippets.

@kylieCat
Created March 7, 2018 10:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kylieCat/3df7d896bc573d6fb5ec66641df74938 to your computer and use it in GitHub Desktop.
Save kylieCat/3df7d896bc573d6fb5ec66641df74938 to your computer and use it in GitHub Desktop.
"""
You'll need to isntall the C++ library libsnappy-dev and the Python bindings
for it to be able to use snappy compression
To install:
Linux: sudo apt-get install libsnappy-dev
OSX: brew isntall libsnappy-dev
Windows: You're boned
Next you'll need the Python library for snappy:
$ python3 -m venv snappy
$ ./snappy/bin/pip install python-snappy
The you can run this script with:
$ ./snappy/bin/python snappy_profiles.py
"""
import os
from pathlib import Path
import snappy
IN_FILE_NAME = "./profiles.json"
OUT_FILE_NAME = "./profiles.snappy"
ONE_MILLION = 10000000
PROFILE = '{"name": "john doe","email": "john@email.com", "picture": "http://example.com/static/image.jpg"}'
def create_file():
profiles = Path(IN_FILE_NAME)
# Don't recreate the file if it's already there
if not profiles.is_file():
with open(IN_FILE_NAME, "w") as file:
for _ in range(ONE_MILLION):
file.write(PROFILE)
if __name__ == "__main__":
# I don't have a JSON file with a million profiles in it
# laying around so I'm making one
# you can remove this if you don't need it
# Just change the value of IN_FILE_NAME to match yours
create_file()
# Uncompressed file size
uncompressed = os.path.getsize(IN_FILE_NAME)
# Open our JSON file
with open(IN_FILE_NAME) as file:
# Save the contents of the fileas a string
string_data = "".join(file.readlines())
# Compress the data in to a binary string
# You can print this string if you want but
# it will be a bunch of unintelligible garbage
binary_data = snappy.compress(string_data)
# Open a file to write to. Open it with "wb" for
# "write, binary" since weahve binary data
with open(OUT_FILE_NAME, "wb") as out:
out.write(binary_data)
compressed = os.path.getsize(OUT_FILE_NAME)
print(f"Uncompressed file size: {str(uncompressed)} bytes ({str(uncompressed >> 20)}MB)")
print(f"Compressed file size: {str(compressed)} bytes ({str(compressed >> 20)}MB)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment