Skip to content

Instantly share code, notes, and snippets.

@den-crane
Created October 30, 2023 21:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save den-crane/f5df610711c0ef58b9f4453565f3c4f0 to your computer and use it in GitHub Desktop.
Save den-crane/f5df610711c0ef58b9f4453565f3c4f0 to your computer and use it in GitHub Desktop.
wierd_compression
CREATE TABLE t
(
key_zstd1 Int64 CODEC(ZSTD(1)),
key_lz4 Int64 CODEC(LZ4),
key_t64_zstd Int64 CODEC(T64,ZSTD(1)),
key_int8 Int8 CODEC(ZSTD(1)),
r Int32
)
ENGINE = MergeTree
ORDER BY (key_zstd1, r);
insert into t
select [0,-1,-4,6,-3][number%5] key, key, key, key, number%3333
from numbers(100000000);
optimize table t final;
SELECT
column,
formatReadableSize(sum(column_data_compressed_bytes) AS size) AS compressed,
formatReadableSize(sum(column_data_uncompressed_bytes) AS usize) AS uncompressed
FROM system.parts_columns
WHERE (active = 1) AND (database LIKE '%') AND (table LIKE 't') and column like 'key%'
GROUP BY
column
ORDER BY size DESC;
┌─column───────┬─compressed─┬─uncompressed─┐
│ key_zstd1 │ 10.14 MiB │ 762.94 MiB │
│ key_lz4 │ 3.42 MiB │ 762.94 MiB │ lz4 is better than zstd1
│ key_t64_zstd │ 758.32 KiB │ 762.94 MiB │
│ key_int8 │ 67.07 KiB │ 95.37 MiB │ the same data in int8 is 154 times smaller
└──────────────┴────────────┴──────────────┘
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment