Skip to content

Instantly share code, notes, and snippets.

@den-crane
Last active January 20, 2023 21:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save den-crane/64ca99b15ca4ea123c137352a306acbd to your computer and use it in GitHub Desktop.
Save den-crane/64ca99b15ca4ea123c137352a306acbd to your computer and use it in GitHub Desktop.
UniqueMergeTree vs ReplacingMergeTree

Test1

UniqueMergeTree

set optimize_on_insert=0;
CREATE TABLE test_unique_mergetree(n1 UInt32,n2 UInt32,s String) ENGINE= UniqueMergeTree ORDER BY n1;

INSERT INTO test_unique_mergetree SELECT number, 1, 'hello' FROM numbers(1e8);
0 rows in set. Elapsed: 65.947 sec. Processed 100.65 million rows, 805.21 MB (1.53 million rows/s., 12.21 MB/s.)

SELECT * FROM test_unique_mergetree ORDER BY n1 limit 10;
10 rows in set. Elapsed: 0.002 sec. Processed 65.54 thousand rows, 1.44 MB (26.33 million rows/s., 579.27 MB/s.)

INSERT INTO test_unique_mergetree SELECT number, 2, 'world' FROM numbers(1e8);
0 rows in set. Elapsed: 243.617 sec. Processed 201.97 million rows, 2.02 GB (829.04 thousand rows/s., 8.30 MB/s.)

SELECT * FROM test_unique_mergetree ORDER BY n1 limit 10;
10 rows in set. Elapsed: 0.002 sec. Processed 8.19 thousand rows, 180.22 KB (3.61 million rows/s., 79.51 MB/s.)

SELECT sum(n2) FROM test_unique_mergetree;
┌───sum(n2)─┐
│ 200000000 │
└───────────┘
1 row in set. Elapsed: 0.021 sec. Processed 100.00 million rows, 400.00 MB (4.74 billion rows/s., 18.97 GB/s.)

SELECT n1%10 g, sum(n2), any(s) FROM test_unique_mergetree where n2=2 group by g;
10 rows in set. Elapsed: 0.160 sec. Processed 100.00 million rows, 2.20 GB (623.17 million rows/s., 13.71 GB/s.)

ReplacingMergeTree

CREATE TABLE test_replacing_mergetree(n1 UInt32,n2 UInt32,s String) ENGINE= ReplacingMergeTree ORDER BY n1;

INSERT INTO test_replacing_mergetree SELECT number, 1, 'hello' FROM numbers(1e8);
0 rows in set. Elapsed: 1.962 sec. Processed 100.65 million rows, 805.21 MB (51.30 million rows/s., 410.44 MB/s.)

SELECT * FROM test_replacing_mergetree final ORDER BY n1 limit 10;
10 rows in set. Elapsed: 0.035 sec. Processed 3.25 million rows, 71.55 MB (91.78 million rows/s., 2.02 GB/s.)

INSERT INTO test_replacing_mergetree SELECT number, 2, 'world' FROM numbers(1e8);
0 rows in set. Elapsed: 2.011 sec. Processed 100.65 million rows, 805.21 MB (50.05 million rows/s., 400.41 MB/s.)

SELECT * FROM test_replacing_mergetree final ORDER BY n1 limit 10;
10 rows in set. Elapsed: 0.032 sec. Processed 4.03 million rows, 88.67 MB (126.55 million rows/s., 2.78 GB/s.)

SELECT sum(n2) FROM test_replacing_mergetree final;
┌───sum(n2)─┐
│ 200000000 │
└───────────┘
1 row in set. Elapsed: 0.513 sec. Processed 128.89 million rows, 1.03 GB (251.40 million rows/s., 2.01 GB/s.)

SELECT n1%10 g, sum(n2), any(s) FROM test_replacing_mergetree final where n2=2 group by g;
10 rows in set. Elapsed: 0.749 sec. Processed 128.89 million rows, 2.84 GB (172.07 million rows/s., 3.79 GB/s.)

Test2

set optimize_on_insert=0, do_not_merge_across_partitions_select_final=1;

CREATE TABLE bench_r(c_int Int64, c_str varchar(255), c_float Float64) 
Engine=ReplacingMergeTree partition by c_int%10 order by c_int as
select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers_mt(100000000);

0 rows in set. Elapsed: 32.725 sec. Processed 100.65 million rows, 805.21 MB (3.08 million rows/s., 24.61 MB/s.)


CREATE TABLE bench_u(c_int Int64, c_str varchar(255), c_float Float64)
Engine=UniqueMergeTree partition by c_int%10 order by c_int as
select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers_mt(100000000);

0 rows in set. Elapsed: 3984.033 sec. Processed 4.88 billion rows, 77.30 GB (1.23 million rows/s., 19.40 MB/s.)
select avg(c_float) from bench_r final;

1 row in set. Elapsed: 0.877 sec. Processed 100.00 million rows, 1.60 GB (114.08 million rows/s., 1.83 GB/s.)


select avg(c_float) from bench_u;

1 row in set. Elapsed: 0.161 sec. Processed 100.00 million rows, 800.00 MB (622.95 million rows/s., 4.98 GB/s.)
select sum(c_int) from bench_r final;

1 row in set. Elapsed: 0.625 sec. Processed 100.00 million rows, 800.00 MB (160.12 million rows/s., 1.28 GB/s.)


select sum(c_int) from bench_u;

1 row in set. Elapsed: 0.148 sec. Processed 100.00 million rows, 800.00 MB (674.65 million rows/s., 5.40 GB/s.)
select max(c_int), max(c_str), max(c_float) from bench_r final;

1 row in set. Elapsed: 2.479 sec. Processed 100.00 million rows, 5.70 GB (40.33 million rows/s., 2.30 GB/s.)


select max(c_int), max(c_str), max(c_float) from bench_u;

1 row in set. Elapsed: 0.813 sec. Processed 100.00 million rows, 5.70 GB (123.01 million rows/s., 7.01 GB/s.)
select c_str from bench_r final group by c_str order by c_str limit 1;

1 row in set. Elapsed: 11.336 sec. Processed 100.00 million rows, 4.90 GB (8.82 million rows/s., 432.24 MB/s.)


select c_str from bench_u group by c_str order by c_str limit 1;

1 row in set. Elapsed: 10.334 sec. Processed 100.00 million rows, 4.10 GB (9.68 million rows/s., 396.76 MB/s.)
select c_str from bench_r final group by c_str limit 1;

1 row in set. Elapsed: 6.162 sec. Processed 100.00 million rows, 4.90 GB (16.23 million rows/s., 795.21 MB/s.)


select c_str from bench_u group by c_str limit 1;

1 row in set. Elapsed: 4.394 sec. Processed 100.00 million rows, 4.10 GB (22.76 million rows/s., 933.07 MB/s.)
select c_int from bench_r final group by c_int order by c_int limit 1;

1 row in set. Elapsed: 4.383 sec. Processed 100.00 million rows, 800.00 MB (22.81 million rows/s., 182.50 MB/s.)


select c_int from bench_u group by c_int order by c_int limit 1;

1 row in set. Elapsed: 3.880 sec. Processed 100.00 million rows, 800.00 MB (25.78 million rows/s., 206.21 MB/s.)
select c_str from bench_r final order by c_str limit 1;

1 row in set. Elapsed: 2.079 sec. Processed 100.00 million rows, 4.90 GB (48.10 million rows/s., 2.36 GB/s.)


select c_str from bench_u order by c_str limit 1;

1 row in set. Elapsed: 0.449 sec. Processed 100.00 million rows, 4.10 GB (222.54 million rows/s., 9.12 GB/s.)
select count() from bench_r final where c_str like '%00000%';

1 row in set. Elapsed: 2.075 sec. Processed 100.00 million rows, 4.90 GB (48.19 million rows/s., 2.36 GB/s.)


select count() from bench_u where c_str like '%00000%';

1 row in set. Elapsed: 0.483 sec. Processed 100.00 million rows, 4.10 GB (206.93 million rows/s., 8.48 GB/s.)
select c_str from bench_r final where c_float <= 1000 order by c_int limit 1;

1 row in set. Elapsed: 0.051 sec. Processed 2.33 million rows, 132.85 MB (45.38 million rows/s., 2.59 GB/s.)


select c_str from bench_u where c_float <= 1000 order by c_int limit 1;

1 row in set. Elapsed: 0.036 sec. Processed 1.00 million rows, 57.20 MB (28.07 million rows/s., 1.60 GB/s.)

Test 3

clickhouse-benchmark -c 2 -i 1000 <<< 'insert into bench_r select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers(1000)'

localhost:9000, queries 1000, QPS: 173.894, RPS: 173893.525, MiB/s: 1.327, result RPS: 0.000, result MiB/s: 0.000.


clickhouse-benchmark -c 2 -i 1000 <<< 'insert into bench_u select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers(1000)'
Loaded 1 queries.

Queries executed: 0.


Queries executed: 0.


Queries executed: 0.


Queries executed: 0.


Queries executed: 0.


Queries executed: 0.


Queries executed: 0.


Queries executed: 0.

An error occurred while processing the query 'insert into bench_u select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers(1000)': Code: 49. DB::Exception: Received from localhost:9000. DB::Exception: Can not find part info in part_info_by_block_number, this is a bug, min_block: 1830. Stack trace:

0. DB::Exception::Exception(DB::Exception::MessageMasked const&, int, bool) @ 0xebe705a in /usr/bin/clickhouse
1. ? @ 0xebf268e in /usr/bin/clickhouse
2. DB::StorageUniqueMergeTree::findPartInfoByMinBlock(long) const @ 0x153e3c78 in /usr/bin/clickhouse
3. DB::UniqueMergeTreeSink::updateDeleteBitmapAndTableVersion(std::__1::shared_ptr<DB::IMergeTreeDataPart>&, DB::MergeTreePartInfo const&, std::__1::unordered_map<long, std::__1::vector<unsigned int, std::__1::allocator<unsigned int>>, std::__1::hash<long>, std::__1::equal_to<long>, std::__1::allocator<std::__1::pair<long const, std::__1::vector<unsigned int, std::__1::allocator<unsigned int>>>>>&, std::__1::unordered_map<long, std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>>, std::__1::hash<long>, std::__1::equal_to<long>, std::__1::allocator<std::__1::pair<long const, std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>>>>> const&) @ 0x157740c3 in /usr/bin/clickhouse
4. DB::UniqueMergeTreeSink::finishDelayedChunk() @ 0x15776493 in /usr/bin/clickhouse
5. DB::UniqueMergeTreeSink::consume(DB::Chunk) @ 0x1577889e in /usr/bin/clickhouse
6. DB::SinkToStorage::onConsume(DB::Chunk) @ 0x15c02642 in /usr/bin/clickhouse
7. ? @ 0x15b51c4b in /usr/bin/clickhouse
8. ? @ 0x15b51999 in /usr/bin/clickhouse
9. DB::ExceptionKeepingTransform::work() @ 0x15b5127f in /usr/bin/clickhouse
10. DB::ExecutionThreadContext::executeTask() @ 0x15969a46 in /usr/bin/clickhouse
11. DB::PipelineExecutor::executeStepImpl(unsigned long, std::__1::atomic<bool>*) @ 0x1595ec3c in /usr/bin/clickhouse
12. DB::PipelineExecutor::executeImpl(unsigned long) @ 0x1595d679 in /usr/bin/clickhouse
13. DB::PipelineExecutor::execute(unsigned long) @ 0x1595d47d in /usr/bin/clickhouse
14. ? @ 0x1595c612 in /usr/bin/clickhouse
15. ThreadPoolImpl<std::__1::thread>::worker(std::__1::__list_iterator<std::__1::thread, void*>) @ 0xecb0016 in /usr/bin/clickhouse
16. ? @ 0xecb51e1 in /usr/bin/clickhouse
17. start_thread @ 0x7fa3 in /lib/x86_64-linux-gnu/libpthread-2.28.so
18. clone @ 0xf94cf in /lib/x86_64-linux-gnu/libc-2.28.so
. (LOGICAL_ERROR) (version 22.13.1.1027)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment