Created
October 16, 2020 17:28
-
-
Save den-crane/42bf77285b53762f11d88d73a46179de to your computer and use it in GitHub Desktop.
SAMPLING
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
https://stackoverflow.com/questions/64369622/using-sample-in-clickhouse-seems-to-read-all-rows-and-more-bytes-is-this-expect | |
CREATE TABLE table_one | |
( timestamp UInt64, | |
transaction_id UInt64, | |
banner_id UInt16, | |
value UInt32 | |
) | |
ENGINE = MergeTree() | |
PARTITION BY toYYYYMMDD(toDateTime(timestamp)) | |
ORDER BY (banner_id, toStartOfHour(toDateTime(timestamp)), cityHash64(transaction_id)) | |
SAMPLE BY cityHash64(transaction_id) | |
SETTINGS index_granularity = 8192 | |
insert into table_one select 1602809234+intDiv(number,100000), number, number%991, toUInt32(rand()) | |
from numbers(10000000000); | |
select banner_id, sum(value), count(value), max(value) | |
from table_one | |
group by banner_id format Null; | |
0 rows in set. Elapsed: 11.490 sec. Processed 10.00 billion rows, 60.00 GB (870.30 million rows/s., 5.22 GB/s.) | |
select banner_id, sum(value), count(value), max(value) | |
from table_one SAMPLE 0.01 | |
group by banner_id format Null; | |
0 rows in set. Elapsed: 1.316 sec. Processed 452.67 million rows, 6.34 GB (343.85 million rows/s., 4.81 GB/s.) | |
select banner_id, sum(value), count(value), max(value) | |
from table_one | |
WHERE banner_id = 42 | |
group by banner_id format Null; | |
0 rows in set. Elapsed: 0.020 sec. Processed 10.30 million rows, 61.78 MB (514.37 million rows/s., 3.09 GB/s.) | |
select banner_id, sum(value), count(value), max(value) | |
from table_one SAMPLE 0.01 | |
WHERE banner_id = 42 | |
group by banner_id format Null; | |
0 rows in set. Elapsed: 0.008 sec. Processed 696.32 thousand rows, 9.75 MB (92.49 million rows/s., 1.29 GB/s.) | |
select banner_id, toStartOfHour(toDateTime(timestamp)) hr, sum(value), count(value), max(value) | |
from table_one | |
group by banner_id, hr format Null; | |
0 rows in set. Elapsed: 36.660 sec. Processed 10.00 billion rows, 140.00 GB (272.77 million rows/s., 3.82 GB/s.) | |
select banner_id, toStartOfHour(toDateTime(timestamp)) hr, sum(value), count(value), max(value) | |
from table_one SAMPLE 0.01 | |
group by banner_id, hr format Null; | |
0 rows in set. Elapsed: 3.741 sec. Processed 452.67 million rows, 9.96 GB (121.00 million rows/s., 2.66 GB/s.) | |
select count() | |
from table_one | |
where value = 666 format Null; | |
1 rows in set. Elapsed: 6.056 sec. Processed 10.00 billion rows, 40.00 GB (1.65 billion rows/s., 6.61 GB/s.) | |
select count() | |
from table_one SAMPLE 0.01 | |
where value = 666 format Null; | |
1 rows in set. Elapsed: 1.214 sec. Processed 452.67 million rows, 5.43 GB (372.88 million rows/s., 4.47 GB/s.) | |
--------- | |
CREATE TABLE table_one | |
( timestamp UInt64, | |
transaction_id UInt64, | |
banner_id UInt16, | |
value UInt32 | |
) | |
ENGINE = MergeTree() | |
PARTITION BY toYYYYMMDD(toDateTime(timestamp)) | |
ORDER BY (banner_id, timestamp, cityHash64(transaction_id)) | |
SAMPLE BY cityHash64(transaction_id) | |
SETTINGS index_granularity = 8192 | |
insert into table_one select 1602809234+intDiv(number,100000), number, number%991, toUInt32(rand()) | |
from numbers(10000000000); | |
select banner_id, sum(value), count(value), max(value) | |
from table_one | |
group by banner_id format Null; | |
0 rows in set. Elapsed: 11.196 sec. Processed 10.00 billion rows, 60.00 GB (893.15 million rows/s., 5.36 GB/s.) | |
select banner_id, sum(value), count(value), max(value) | |
from table_one SAMPLE 0.01 | |
group by banner_id format Null; | |
0 rows in set. Elapsed: 24.378 sec. Processed 10.00 billion rows, 140.00 GB (410.21 million rows/s., 5.74 GB/s.) | |
select banner_id, sum(value), count(value), max(value) | |
from table_one | |
WHERE banner_id = 42 | |
group by banner_id format Null; | |
0 rows in set. Elapsed: 0.022 sec. Processed 10.27 million rows, 61.64 MB (459.28 million rows/s., 2.76 GB/s.) | |
select banner_id, sum(value), count(value), max(value) | |
from table_one SAMPLE 0.01 | |
WHERE banner_id = 42 | |
group by banner_id format Null; | |
0 rows in set. Elapsed: 0.037 sec. Processed 10.27 million rows, 143.82 MB (275.16 million rows/s., 3.85 GB/s.) | |
select banner_id, toStartOfHour(toDateTime(timestamp)) hr, sum(value), count(value), max(value) | |
from table_one | |
group by banner_id, hr format Null; | |
0 rows in set. Elapsed: 21.663 sec. Processed 10.00 billion rows, 140.00 GB (461.62 million rows/s., 6.46 GB/s.) | |
select banner_id, toStartOfHour(toDateTime(timestamp)) hr, sum(value), count(value), max(value) | |
from table_one SAMPLE 0.01 | |
group by banner_id, hr format Null; | |
0 rows in set. Elapsed: 26.697 sec. Processed 10.00 billion rows, 220.00 GB (374.57 million rows/s., 8.24 GB/s.) | |
select count() | |
from table_one | |
where value = 666 format Null; | |
0 rows in set. Elapsed: 7.679 sec. Processed 10.00 billion rows, 40.00 GB (1.30 billion rows/s., 5.21 GB/s.) | |
select count() | |
from table_one SAMPLE 0.01 | |
where value = 666 format Null; | |
0 rows in set. Elapsed: 21.668 sec. Processed 10.00 billion rows, 120.00 GB (461.51 million rows/s., 5.54 GB/s.) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment