Skip to content

Instantly share code, notes, and snippets.

View den-crane's full-sized avatar
🙀
What's happening?

Denny Crane den-crane

🙀
What's happening?
View GitHub Profile
@den-crane
den-crane / wierd_compression.sql
Created October 30, 2023 21:20
wierd_compression
View wierd_compression.sql
CREATE TABLE t
(
key_zstd1 Int64 CODEC(ZSTD(1)),
key_lz4 Int64 CODEC(LZ4),
key_t64_zstd Int64 CODEC(T64,ZSTD(1)),
key_int8 Int8 CODEC(ZSTD(1)),
r Int32
)
ENGINE = MergeTree
ORDER BY (key_zstd1, r);
@den-crane
den-crane / uniqHLL12_vs_uniqTheta
Created October 25, 2023 21:48
uniqHLL12 vs uniqTheta
View uniqHLL12_vs_uniqTheta
create table x (a Int64, b Int64, c Int64,
u AggregateFunction(uniqHLL12, String),
t AggregateFunction(uniqTheta, String))
Engine=MergeTree order by (a,b,c);
insert into x select number%11111 a, number%44 b, number%3 c, uniqHLL12State(number::String), uniqThetaState(number::String)
from numbers(1e7) group by a,b,c;
optimize table x final;
@den-crane
den-crane / ttl_merge_of_wide_rows_mem_usage.md
Last active September 6, 2023 22:05
ttl_merge_of_wide_rows_mem_usage
View ttl_merge_of_wide_rows_mem_usage.md
CREATE TABLE wide_rows2(`A` Int64, `D` Date, `S` String)
ENGINE = MergeTree PARTITION BY toYYYYMM(D) ORDER BY A
settings merge_with_ttl_timeout=300, materialize_ttl_recalculate_only = 1;

insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%999), range(2000)) from numbers(1e5);
insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%899), range(2000)) from numbers(1e5);
insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%799), range(2000)) from numbers(1e5);
insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%699), range(2000)) from numbers(1e5);
set materialize_ttl_after_modify=0;
@den-crane
den-crane / ch_vs_sr.md
Last active July 20, 2023 21:54
ch vs sr
View ch_vs_sr.md

CH 1-node

SELECT
    count(),
    min(paramkey),
    max(paramkey),
    min(ind),
    max(ind)
FROM data_table
┌───count()─┬────────min(paramkey)─┬───────max(paramkey)─┬─min(ind)─┬──max(ind)─┐
View gist:976d8213b15c8ef5b3f48c052402a9d1
https://github.com/ClickHouse/ClickHouse/issues/47092#issuecomment-1485052499
23.3.8.21
<default>
<access_management>1</access_management>
</default>
create user foo;
grant all on *.* to foo;
@den-crane
den-crane / MinMax.md
Last active June 6, 2023 18:38
Using MinMax skip index to improve partition reading
View MinMax.md
create table test(tenant_id UInt32,  ts1 DateTime, timestamp DateTime)
engine=MergeTree
partition by toDate(ts1)
PRIMARY KEY (tenant_id, toStartOfHour(timestamp))
ORDER BY (tenant_id, toStartOfHour(timestamp),  timestamp)
as select 1, now() - number/10 x, x from numbers(1e8);

insert into test select number%1000, now() - number/10 x, 0 from numbers(1e8);
@den-crane
den-crane / ingest.md
Created April 27, 2023 12:17
ingest data from file using clickhouse-http-java-client
View ingest.md

Test data

clickhouse-client -q "select number A, now() B, 'x' C from numbers(1e6) format Parquet" > test.parquet

Maven

    <dependencies>
@den-crane
den-crane / total_over_group.md
Created March 17, 2023 18:20
Average / Total sum over group
View total_over_group.md
create table A (pk_col Int64, col1 Float64, col4 Float64, day Date) Engine=Memory
as select * from values( 
            (1, 3, 4, today()), (1, 1, 4, today()), (1, 2, 0, today()-1), 
            (2, 3, 4, today()), (2, 3, 0, today()-1) );

Window function

@den-crane
den-crane / Buffer_Distributed_Replicated.md
Last active February 28, 2023 20:00
Buffer -> Distributed -> Replicated
View Buffer_Distributed_Replicated.md

Buffer -> Replicated

create database test on cluster '{cluster}' Engine=Ordinary;

create table test.test on cluster '{cluster}' (A Int64) 
Engine = ReplicatedMergeTree('/clickhouse/{cluster}/tables/{database}/{table}', '{replica}') order by A;

create table test.test_b on cluster  '{cluster}' as test.test
Engine = Buffer(test, test, 1, 1, 2, 10, 100, 10000000, 100000000);
@den-crane
den-crane / primary_key_and_memory_allocation.sql
Created February 14, 2023 19:01
primary key is loaded on Clickhouse start
View primary_key_and_memory_allocation.sql
create table idx( A Int64, D Date) Engine=MergeTree order by A partition by D;
insert into idx select number, today() from numbers(1e8);
insert into idx select number, today()-1 from numbers(1e8);
select sum(primary_key_bytes_in_memory), sum(primary_key_bytes_in_memory_allocated)
from system.parts where table = 'idx' and active;
┌─sum(primary_key_bytes_in_memory)─┬─sum(primary_key_bytes_in_memory_allocated)─┐
│ 195360 │ 264192 │
└──────────────────────────────────┴────────────────────────────────────────────┘