Skip to content

Instantly share code, notes, and snippets.

@dpzmick
Created December 11, 2020 18:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dpzmick/7658c0b0e934e87853025f9fc3227ce3 to your computer and use it in GitHub Desktop.
Save dpzmick/7658c0b0e934e87853025f9fc3227ce3 to your computer and use it in GitHub Desktop.
N = 1000
# make two scripts to pass to clickhouse client to demonstrate sadness with
# materialized columns
with open('create.sql', 'w') as f:
f.write("DROP DATABASE IF EXISTS test;")
f.write("CREATE DATABASE test;")
# --- fast table as a baseline
f.write(f"CREATE TABLE test.fast (")
f.write(f" keys Array(String) CODEC(ZSTD),")
f.write(f" values Array(String) CODEC(ZSTD),")
for i in range(0, N):
end = ")" if i==N-1 else ","
f.write(f" metric{i} UInt64 CODEC(DoubleDelta, ZSTD){end}")
f.write(f" ENGINE = MergeTree")
f.write(f" ORDER BY tuple();")
# --- very slow, uses arrayFirst() to index as it returns a default value
# we do not always know that all keys exist in the [keys] array
f.write(f"CREATE TABLE test.slow(")
f.write(f" keys Array(String) CODEC(ZSTD),")
f.write(f" values Array(String) CODEC(ZSTD),")
for i in range(0, N):
end = ")" if i==N-1 else ","
# we need to be able to tolerate missing data, so we've come up with:
f.write(f" metric{i} UInt64 MATERIALIZED toUInt64OrZero(arrayFirst((v,k) -> (k=='metric{i}'), values, keys)) CODEC(DoubleDelta, ZSTD){end}")
f.write(f" ENGINE = MergeTree")
f.write(f" ORDER BY tuple();")
def make_line():
keys = []
values = []
for i in range(0, N):
keys.append(f'metric{i}')
values.append('123')
return (keys, values)
# fast insert precomputes all of the columns
with open('insert_fast.sql', 'w') as f:
(k, v) = make_line()
# format everything for the insert line
k = '[' + ','.join(map(lambda key: f"'{key}'", k)) + ']'
v = '[' + ','.join(map(lambda val: f"{val}", v)) + ']'
# "precomputed" values columums
cols = ','.join(map(lambda i: f'metric{i}', range(0, N)))
pre = ','.join(map(lambda _: '123', range(0, N)))
f.write(f'INSERT INTO test.fast (keys, values, {cols}) VALUES ')
f.write(f'({k}, {v}, {pre});')
with open('insert_slow.sql', 'w') as f:
(k, v) = make_line()
# format everything for the insert line
k = '[' + ','.join(map(lambda key: f"'{key}'", k)) + ']'
v = '[' + ','.join(map(lambda val: f"{val}", v)) + ']'
f.write(f'INSERT INTO test.slow (keys, values) VALUES ')
f.write(f'({k}, {v});')
import os
import time
setup_begin = time.time()
os.system('cat create.sql | clickhouse-client --multiquery')
setup_end = time.time()
print(f'setup {setup_end-setup_begin}s')
fast_begin = time.time();
os.system('cat insert_fast.sql | clickhouse-client')
fast_end = time.time()
print(f'fast: {fast_end-fast_begin}s')
slow_begin = time.time()
os.system('cat insert_slow.sql | clickhouse-client')
slow_end = time.time()
print(f'slow: {slow_end-slow_begin}s')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment