Skip to content

Instantly share code, notes, and snippets.

@oldcai
Last active March 30, 2024 13:11
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save oldcai/7230548 to your computer and use it in GitHub Desktop.
Save oldcai/7230548 to your computer and use it in GitHub Desktop.
zlib vs lz4 vs lzma vs zstd compression
import time
import requests
import zlib
#!pip install lz4 pylzma zstd
import lz4.block
import pylzma as lzma
import zstd
def measure_time_and_compress_decompress(compress_func, decompress_func, data, *args):
# Measure compression time
start_compress_time = time.time()
compressed_data = compress_func(data, *args) if args else compress_func(data)
compress_time = time.time() - start_compress_time
# Measure decompression time
start_decompress_time = time.time()
decompress_func(compressed_data, *args) if args else decompress_func(compressed_data)
decompress_time = time.time() - start_decompress_time
return compressed_data, compress_time, decompress_time
def analyze_compression(name, compressed_data, original_len, elapsed_time, decompress_time):
compressed_len = len(compressed_data)
ratio = compressed_len / original_len
compress_speed = original_len / elapsed_time / 1024 ** 2 # Convert to MByte/s
decompress_speed = original_len / decompress_time / 1024 ** 2 # Convert to MByte/s
print(f"{name}:")
print(f" Compressed len: {compressed_len}, Compression ratio: {ratio:.5f}")
print(f" Compression time: {elapsed_time:.5f}s, Speed: {compress_speed:.2f} MByte/s")
print(f" Decompression time: {decompress_time:.5f}s, Speed: {decompress_speed:.2f} MByte/s")
def analysis_ratios(source):
len_source = len(source)
print("Source len:", len_source)
# Zlib
zlib_compressed, zlib_time, zlib_decompress_time = measure_time_and_compress_decompress(zlib.compress, zlib.decompress, source)
analyze_compression("Zlib", zlib_compressed, len_source, zlib_time, zlib_decompress_time)
# LZ4
lz4_compressed, lz4_time, lz4_decompress_time = measure_time_and_compress_decompress(lz4.block.compress, lz4.block.decompress, source)
analyze_compression("LZ4", lz4_compressed, len_source, lz4_time, lz4_decompress_time)
# LZMA
lzma_compressed, lzma_time, lzma_decompress_time = measure_time_and_compress_decompress(lzma.compress, lzma.decompress, source)
analyze_compression("LZMA", lzma_compressed, len_source, lzma_time, lzma_decompress_time)
# Zstd level 3
zstd_compressed_lvl3, zstd_time_lvl3, zstd_decompress_time_lvl3 = measure_time_and_compress_decompress(lambda x: zstd.compress(x, 3), zstd.decompress, source)
analyze_compression("Zstd level 3", zstd_compressed_lvl3, len_source, zstd_time_lvl3, zstd_decompress_time_lvl3)
# Zstd level 19
zstd_compressed_lvl19, zstd_time_lvl19, zstd_decompress_time_lvl19 = measure_time_and_compress_decompress(lambda x: zstd.compress(x, 19), zstd.decompress, source)
analyze_compression("Zstd level 19", zstd_compressed_lvl19, len_source, zstd_time_lvl19, zstd_decompress_time_lvl19)
print("Testing with a million repeated characters:")
analysis_ratios(b"a" * 1000000)
print("-" * 50)
print("Testing in the real world:")
def get_website_content(url):
try:
response = requests.get(url, headers={"user-agent": "Mozilla/5.0"})
return response.content
except requests.RequestException as e:
print(f"Error fetching {url}: {e}")
return None
test_str_in_real_world = get_website_content("https://www.163.com/")
if test_str_in_real_world:
analysis_ratios(test_str_in_real_world)
Testing with a million repeated characters:
Source len: 1000000
Zlib:
Compressed len: 992, Compression ratio: 0.00099
Compression time: 0.00490s, Speed: 194.72 MByte/s
Decompression time: 0.00278s, Speed: 343.41 MByte/s
LZ4:
Compressed len: 3936, Compression ratio: 0.00394
Compression time: 0.00011s, Speed: 8565.31 MByte/s
Decompression time: 0.00059s, Speed: 1628.00 MByte/s
LZMA:
Compressed len: 221, Compression ratio: 0.00022
Compression time: 0.10568s, Speed: 9.02 MByte/s
Decompression time: 0.00239s, Speed: 398.60 MByte/s
Zstd level 1:
Compressed len: 51, Compression ratio: 0.00005
Compression time: 0.00116s, Speed: 821.52 MByte/s
Decompression time: 0.00007s, Speed: 13245.03 MByte/s
Zstd level 3:
Compressed len: 50, Compression ratio: 0.00005
Compression time: 0.00111s, Speed: 858.37 MByte/s
Decompression time: 0.00027s, Speed: 3530.45 MByte/s
Zstd level 19:
Compressed len: 49, Compression ratio: 0.00005
Compression time: 0.00621s, Speed: 153.63 MByte/s
Decompression time: 0.00013s, Speed: 7421.15 MByte/s
Zstd level 22:
Compressed len: 49, Compression ratio: 0.00005
Compression time: 0.00151s, Speed: 632.21 MByte/s
Decompression time: 0.00013s, Speed: 7220.22 MByte/s
--------------------------------------------------
Testing in the real world:
Source len: 555360
Zlib:
Compressed len: 90642, Compression ratio: 0.16321
Compression time: 0.00817s, Speed: 64.86 MByte/s
Decompression time: 0.00117s, Speed: 452.52 MByte/s
LZ4:
Compressed len: 125840, Compression ratio: 0.22659
Compression time: 0.00072s, Speed: 733.15 MByte/s
Decompression time: 0.00019s, Speed: 2735.76 MByte/s
LZMA:
Compressed len: 67894, Compression ratio: 0.12225
Compression time: 0.18124s, Speed: 2.92 MByte/s
Decompression time: 0.00393s, Speed: 134.92 MByte/s
Zstd level 1:
Compressed len: 95823, Compression ratio: 0.17254
Compression time: 0.00172s, Speed: 308.19 MByte/s
Decompression time: 0.00055s, Speed: 958.76 MByte/s
Zstd level 3:
Compressed len: 88492, Compression ratio: 0.15934
Compression time: 0.00218s, Speed: 243.02 MByte/s
Decompression time: 0.00055s, Speed: 960.42 MByte/s
Zstd level 19:
Compressed len: 73132, Compression ratio: 0.13168
Compression time: 0.17033s, Speed: 3.11 MByte/s
Decompression time: 0.00053s, Speed: 996.61 MByte/s
Zstd level 22:
Compressed len: 73149, Compression ratio: 0.13171
Compression time: 0.21461s, Speed: 2.47 MByte/s
Decompression time: 0.00053s, Speed: 990.39 MByte/s
@awoimbee
Copy link

Thanks !

@varenc
Copy link

varenc commented Jun 16, 2022

thanks for this!

163.com now returns a brief error when the default python requests user-agent is used, but you can fix that like this:

test_str_in_real_world = requests.get("https://www.163.com/",headers={"user-agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/536.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/535.36"}).content

@alavalas1
Copy link

how to run this one in linux

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment