Created
December 11, 2020 18:32
-
-
Save dpzmick/7658c0b0e934e87853025f9fc3227ce3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
N = 1000 | |
# make two scripts to pass to clickhouse client to demonstrate sadness with | |
# materialized columns | |
with open('create.sql', 'w') as f: | |
f.write("DROP DATABASE IF EXISTS test;") | |
f.write("CREATE DATABASE test;") | |
# --- fast table as a baseline | |
f.write(f"CREATE TABLE test.fast (") | |
f.write(f" keys Array(String) CODEC(ZSTD),") | |
f.write(f" values Array(String) CODEC(ZSTD),") | |
for i in range(0, N): | |
end = ")" if i==N-1 else "," | |
f.write(f" metric{i} UInt64 CODEC(DoubleDelta, ZSTD){end}") | |
f.write(f" ENGINE = MergeTree") | |
f.write(f" ORDER BY tuple();") | |
# --- very slow, uses arrayFirst() to index as it returns a default value | |
# we do not always know that all keys exist in the [keys] array | |
f.write(f"CREATE TABLE test.slow(") | |
f.write(f" keys Array(String) CODEC(ZSTD),") | |
f.write(f" values Array(String) CODEC(ZSTD),") | |
for i in range(0, N): | |
end = ")" if i==N-1 else "," | |
# we need to be able to tolerate missing data, so we've come up with: | |
f.write(f" metric{i} UInt64 MATERIALIZED toUInt64OrZero(arrayFirst((v,k) -> (k=='metric{i}'), values, keys)) CODEC(DoubleDelta, ZSTD){end}") | |
f.write(f" ENGINE = MergeTree") | |
f.write(f" ORDER BY tuple();") | |
def make_line(): | |
keys = [] | |
values = [] | |
for i in range(0, N): | |
keys.append(f'metric{i}') | |
values.append('123') | |
return (keys, values) | |
# fast insert precomputes all of the columns | |
with open('insert_fast.sql', 'w') as f: | |
(k, v) = make_line() | |
# format everything for the insert line | |
k = '[' + ','.join(map(lambda key: f"'{key}'", k)) + ']' | |
v = '[' + ','.join(map(lambda val: f"{val}", v)) + ']' | |
# "precomputed" values columums | |
cols = ','.join(map(lambda i: f'metric{i}', range(0, N))) | |
pre = ','.join(map(lambda _: '123', range(0, N))) | |
f.write(f'INSERT INTO test.fast (keys, values, {cols}) VALUES ') | |
f.write(f'({k}, {v}, {pre});') | |
with open('insert_slow.sql', 'w') as f: | |
(k, v) = make_line() | |
# format everything for the insert line | |
k = '[' + ','.join(map(lambda key: f"'{key}'", k)) + ']' | |
v = '[' + ','.join(map(lambda val: f"{val}", v)) + ']' | |
f.write(f'INSERT INTO test.slow (keys, values) VALUES ') | |
f.write(f'({k}, {v});') | |
import os | |
import time | |
setup_begin = time.time() | |
os.system('cat create.sql | clickhouse-client --multiquery') | |
setup_end = time.time() | |
print(f'setup {setup_end-setup_begin}s') | |
fast_begin = time.time(); | |
os.system('cat insert_fast.sql | clickhouse-client') | |
fast_end = time.time() | |
print(f'fast: {fast_end-fast_begin}s') | |
slow_begin = time.time() | |
os.system('cat insert_slow.sql | clickhouse-client') | |
slow_end = time.time() | |
print(f'slow: {slow_end-slow_begin}s') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment