Plain Table VS Table with Projection VS Table with GIN
try to insert 100 000 000 rows fails with OOM
set allow_experimental_inverted_index=1;
CREATETABLEbench(c_int Int64, c_str varchar(255), c_float Float64)
Engine=ReplacingMergeTree partition by c_int%10order by c_int asselect toInt64(cityHash64(number)),
lower(hex(MD5(toString(number)))),
number/cityHash64(number)*10000000from numbers_mt(100000000);
0 rows inset. Elapsed: 36.980 sec. Processed 100.65 million rows, 805.21 MB (2.72 million rows/s., 21.77 MB/s.)
CREATETABLEbench_p(c_int Int64, c_str varchar(255), c_float Float64,
projection p1 (select c_int, c_str order by c_str))
Engine=ReplacingMergeTree partition by c_int%10order by c_int asselect toInt64(cityHash64(number)),
lower(hex(MD5(toString(number)))),
number/cityHash64(number)*10000000from numbers_mt(100000000);
0 rows inset. Elapsed: 64.473 sec. Processed 200.65 million rows, 6.51 GB (3.11 million rows/s., 100.90 MB/s.)
CREATETABLEbench_i(c_int Int64, c_str varchar(255), c_float Float64,
INDEX af (c_str) TYPE inverted() GRANULARITY 1)
Engine=ReplacingMergeTree partition by c_int%10order by c_int asselect toInt64(cityHash64(number)),
lower(hex(MD5(toString(number)))),
number/cityHash64(number)*10000000from numbers_mt(100000000);
0 rows inset. Elapsed: 594.427 sec. Processed 24.11 million rows, 192.91 MB (40.57 thousand rows/s., 324.54 KB/s.)
Received exception from server (version 22.13.1):
Code: 241. DB::Exception: Received from localhost:9000. DB::Exception:
Memory limit (total) exceeded: would use 56.57 GiB (attempt to allocate chunk of 5094480 bytes), maximum: 56.53 GiB.
OvercommitTracker decision: Query was selected to stop by OvercommitTracker.. (MEMORY_LIMIT_EXCEEDED)
DROPTABLE bench
0 rows inset. Elapsed: 1.718 sec.
DROPTABLE bench_p
0 rows inset. Elapsed: 0.001 sec.
DROPTABLE bench_i
Error on processing query: Code: 32. DB::Exception: Attempt to read after eof: while receiving packet from localhost:9000. (ATTEMPT_TO_READ_AFTER_EOF) (version 22.13.1.2671)
Connecting to localhost:9000as user default.
Code: 210. DB::NetException: Connection refused (localhost:9000). (NETWORK_ERROR)
seems OOM
/etc/init.d/clickhouse-server restart
seems OOM again
rm /var/lib/clickhouse/metadata/default/bench_i.sql
try to insert 10 000 000 rows: insert is slower 64 times, Clickhouse used 25 GB RAM.
set allow_experimental_inverted_index=1;
CREATETABLEbench(c_int Int64, c_str varchar(255), c_float Float64)
Engine=ReplacingMergeTree partition by c_int%10order by c_int asselect toInt64(cityHash64(number)),
lower(hex(MD5(toString(number)))),
number/cityHash64(number)*10000000from numbers_mt(10000000);
0 rows inset. Elapsed: 3.539 sec. Processed 10.48 million rows, 83.88 MB (2.96 million rows/s., 23.70 MB/s.)
CREATETABLEbench_p(c_int Int64, c_str varchar(255), c_float Float64,
projection p1 (select c_int, c_str order by c_str))
Engine=ReplacingMergeTree partition by c_int%10order by c_int asselect toInt64(cityHash64(number)),
lower(hex(MD5(toString(number)))),
number/cityHash64(number)*10000000from numbers_mt(10000000);
0 rows inset. Elapsed: 5.912 sec. Processed 20.48 million rows, 653.88 MB (3.46 million rows/s., 110.60 MB/s.)
CREATETABLEbench_i(c_int Int64, c_str varchar(255), c_float Float64,
INDEX af (c_str) TYPE inverted() GRANULARITY 1
)
Engine=ReplacingMergeTree partition by c_int%10order by c_int asselect toInt64(cityHash64(number)),
lower(hex(MD5(toString(number)))),
number/cityHash64(number)*10000000from numbers_mt(10000000);
0 rows inset. Elapsed: 226.779 sec. Processed 10.48 million rows, 83.88 MB (46.23 thousand rows/s., 369.86 KB/s.)
OK
select*from bench where c_str =lower(hex(MD5(toString(999999))));
┌───────────────c_int─┬─c_str────────────────────────────┬──────────────────c_float─┐
│ 3311836950280227934 │ 52c69e3a57331081823331c4e69d3f2e │ 0.0000030194693006109073 │
└─────────────────────┴──────────────────────────────────┴──────────────────────────┘
1 row inset. Elapsed: 0.074 sec. Processed 10.00 million rows, 410.10 MB (135.31 million rows/s., 5.55 GB/s.)
select*from bench_p where c_int in (
select c_int from bench_p where c_str =lower(hex(MD5(toString(999999)))))
and c_str =lower(hex(MD5(toString(999999))));
┌───────────────c_int─┬─c_str────────────────────────────┬──────────────────c_float─┐
│ 3311836950280227934 │ 52c69e3a57331081823331c4e69d3f2e │ 0.0000030194693006109073 │
└─────────────────────┴──────────────────────────────────┴──────────────────────────┘
1 row inset. Elapsed: 0.021 sec. Processed 1.19 million rows, 48.86 MB (56.97 million rows/s., 2.34 GB/s.)
select*from bench_i where c_str =lower(hex(MD5(toString(999999))));
┌───────────────c_int─┬─c_str────────────────────────────┬──────────────────c_float─┐
│ 3311836950280227934 │ 52c69e3a57331081823331c4e69d3f2e │ 0.0000030194693006109073 │
└─────────────────────┴──────────────────────────────────┴──────────────────────────┘
1 row inset. Elapsed: 0.004 sec. Processed 8.19 thousand rows, 466.94 KB (2.03 million rows/s., 115.98 MB/s.)
Fail
selectcount() from bench where c_str <lower(hex(MD5(toString(999999))));
1 row inset. Elapsed: 0.074 sec. Processed 10.00 million rows, 410.10 MB (135.31 million rows/s., 5.55 GB/s.)
selectcount() from bench_p where c_int in (
select c_int from bench_p where c_str <lower(hex(MD5(toString(999999)))))
and c_str <lower(hex(MD5(toString(999999))));
1 row inset. Elapsed: 0.705 sec. Processed 7.78 million rows, 381.34 MB (11.03 million rows/s., 540.55 MB/s.)
selectcount() from bench_i where c_str <lower(hex(MD5(toString(999999))));
┌─count()─┐
│ 3233584 │
└─────────┘
1 row inset. Elapsed: 0.075 sec. Processed 10.00 million rows, 410.00 MB (133.77 million rows/s., 5.48 GB/s.)
OK
selectcount() from bench where c_str in ('52c69e3a57331081823331c4e69d3f2e' , 'A', 'B', 'C', 'D', 'E', 'F');
┌─count()─┐
│ 1 │
└─────────┘
1 row inset. Elapsed: 0.065 sec. Processed 10.00 million rows, 410.05 MB (154.13 million rows/s., 6.32 GB/s.)
selectcount() from bench_p where c_int in (
select c_int from bench_p where c_str in ('52c69e3a57331081823331c4e69d3f2e' , 'A', 'B', 'C', 'D', 'E', 'F'))
and c_str in ('52c69e3a57331081823331c4e69d3f2e' , 'A', 'B', 'C', 'D', 'E', 'F');
┌─count()─┐
│ 1 │
└─────────┘
1 row inset. Elapsed: 0.029 sec. Processed 2.03 million rows, 83.39 MB (69.81 million rows/s., 2.87 GB/s.)
selectcount() from bench_i where c_str in ('52c69e3a57331081823331c4e69d3f2e' , 'A', 'B', 'C', 'D', 'E', 'F');
┌─count()─┐
│ 1 │
└─────────┘
1 row inset. Elapsed: 0.006 sec. Processed 8.19 thousand rows, 335.87 KB (1.35 million rows/s., 55.48 MB/s.)
OK
selectcount() from bench where c_str ='52c69e3a57331081823331c4e69d3f2e'or c_str='A'or c_str='B';
┌─count()─┐
│ 1 │
└─────────┘
1 row inset. Elapsed: 0.061 sec. Processed 10.00 million rows, 410.05 MB (165.13 million rows/s., 6.77 GB/s.)
selectcount() from bench_p where c_int in (
select c_int from bench_p where c_str ='52c69e3a57331081823331c4e69d3f2e'or c_str='A'or c_str='B')
and c_str ='52c69e3a57331081823331c4e69d3f2e'or c_str='A'or c_str='B';
┌─count()─┐
│ 1 │
└─────────┘
1 row inset. Elapsed: 0.047 sec. Processed 3.24 million rows, 146.01 MB (68.76 million rows/s., 3.09 GB/s.)
selectcount() from bench_i where c_str ='52c69e3a57331081823331c4e69d3f2e'or c_str='A'or c_str='B';
┌─count()─┐
│ 1 │
└─────────┘
1 row inset. Elapsed: 0.004 sec. Processed 8.19 thousand rows, 335.87 KB (1.93 million rows/s., 79.20 MB/s.)
Fail / index is not uses if there is no value in the index ???? Or it's about spaces/empty string?
selectcount() from bench_i where c_str ='';
┌─count()─┐
│ 0 │
└─────────┘
1 row inset. Elapsed: 0.078 sec. Processed 10.00 million rows, 410.00 MB (127.70 million rows/s., 5.24 GB/s.)
selectcount() from bench_i where c_str ='';
┌─count()─┐
│ 0 │
└─────────┘
1 row inset. Elapsed: 0.061 sec. Processed 10.00 million rows, 410.09 MB (164.80 million rows/s., 6.76 GB/s.)
selectcount() from bench_i where c_str ='A';
┌─count()─┐
│ 0 │
└─────────┘
1 row inset. Elapsed: 0.004 sec.
QPS worse 40 times
clickhouse-benchmark -c 3 -i 1000 <<< 'insert into bench select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers(1000)'
localhost:9000, queries 1000, QPS: 303.071, RPS: 303070.946, MiB/s: 2.312, result RPS: 0.000, result MiB/s: 0.000.
clickhouse-benchmark -c 3 -i 1000 <<< 'insert into bench_p select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers(1000)'
localhost:9000, queries 1000, QPS: 181.182, RPS: 362363.706, MiB/s: 11.231, result RPS: 0.000, result MiB/s: 0.000.
clickhouse-benchmark -c 3 -i 1000 <<< 'insert into bench_i select toInt64(cityHash64(number)), lower(hex(MD5(toString(number)))), number/cityHash64(number)*10000000 from numbers(1000)'
localhost:9000, queries 513, QPS: 8.482, RPS: 8481.658, MiB/s: 0.065, result RPS: 0.000, result MiB/s: 0.000.
optimize
optimize table bench final;
0 rows inset. Elapsed: 2.451 sec.
optimize table bench_p final;
0 rows inset. Elapsed: 2.909 sec.
optimize table bench_i final;
0 rows inset. Elapsed: 256.493 sec.
Fail / index is not uses if there is no value in the index ???? Or it's about spaces/empty string?
All granules are skipped when search term is not found in inverted index. That's why it doesn't say how many rows are processed.