Skip to content

Instantly share code, notes, and snippets.

@Anexen
Created December 26, 2019 18:01
Show Gist options
  • Save Anexen/ceaabc2346c575859ea0d71ffc2b53ef to your computer and use it in GitHub Desktop.
Save Anexen/ceaabc2346c575859ea0d71ffc2b53ef to your computer and use it in GitHub Desktop.
clickhouse driver insert columnar data benchmark
import sys
import time
import resource
from clickhouse_driver import Client
def memory_usage():
return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
def generate_data(n):
return [(i, str(i), i * 1.1) for i in range(n)]
def generate_columnar_data(n):
return [
tuple(range(n)),
tuple(map(str, range(n))),
tuple(i * 1.1 for i in range(n))
]
def transpose(data):
return list(map(tuple, zip(*data)))
def test(client, data_gen, columnar):
print('Mem at start: {:>10}'.format(memory_usage()))
data = data_gen()
print('Mem with data: {:>10}'.format(memory_usage()))
start = time.time()
client.execute("INSERT INTO test (a, b, c) VALUES",
data, columnar=columnar)
end = time.time()
print('Mem after insert: {:>10}'.format(memory_usage()))
print('Insertion time: {:>10}'.format(round(end - start, 8)))
if __name__ == '__main__':
n = 2000000
client = Client(host='localhost')
# client.execute('DROP TABLE test')
client.execute(
'CREATE TABLE test (a UInt32, b String, c Float32) ENGINE Memory()'
)
try:
test_num = int(sys.argv[-1])
if test_num == 1:
test(client, lambda: generate_data(n), columnar=False)
elif test_num == 2:
test(client, lambda: transpose(generate_columnar_data(n)),
columnar=False)
elif test_num == 3:
test(client, lambda: generate_columnar_data(n),
columnar=True)
finally:
client.execute('DROP TABLE test')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment