Skip to content

Instantly share code, notes, and snippets.

View den-crane's full-sized avatar
🙀
What's happening?

Denny Crane den-crane

🙀
What's happening?
View GitHub Profile
@den-crane
den-crane / gist:6eff375752a236a456e1b3dc2ca7db62
Last active September 30, 2023 08:43
Clickhouse example AggregatingMergeTree, (max, min, avg ) State / Merge
DROP TABLE IF EXISTS requests;
CREATE TABLE requests (
request_date Date,
request_time DateTime,
response_time Int,
request_uri String)
ENGINE = MergeTree(request_date, (request_time, request_uri), 8192);
@den-crane
den-crane / AggregatingMergeTree-event-enrichment
Last active September 7, 2023 17:32
AggregatingMergeTree-event-enrichment
CREATE TABLE states_raw(
d date,
uid UInt64,
first_name String,
last_name String,
modification_timestamp_mcs DateTime64(3) default now64(3)
) ENGINE = Null;
CREATE TABLE final_states_by_month(
d date,
@den-crane
den-crane / ttl_merge_of_wide_rows_mem_usage.md
Last active September 6, 2023 22:05
ttl_merge_of_wide_rows_mem_usage
CREATE TABLE wide_rows2(`A` Int64, `D` Date, `S` String)
ENGINE = MergeTree PARTITION BY toYYYYMM(D) ORDER BY A
settings merge_with_ttl_timeout=300, materialize_ttl_recalculate_only = 1;

insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%999), range(2000)) from numbers(1e5);
insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%899), range(2000)) from numbers(1e5);
insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%799), range(2000)) from numbers(1e5);
insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%699), range(2000)) from numbers(1e5);
set materialize_ttl_after_modify=0;
@den-crane
den-crane / AggregatingMergeTree-groupArrayState
Last active July 25, 2023 16:15
AggregatingMergeTree-groupArrayState
----------------------- SimpleAggregateFunction + groupArrayArray ----------------------------
drop table if exists states_raw;
drop table if exists final_states_by_month;
drop table if exists final_states_by_month_mv;
CREATE TABLE states_raw(d date, uid UInt64, s String) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state SimpleAggregateFunction(groupArrayArray, Array(String)))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
@den-crane
den-crane / ch_vs_sr.md
Last active July 20, 2023 21:54
ch vs sr

CH 1-node

SELECT
    count(),
    min(paramkey),
    max(paramkey),
    min(ind),
    max(ind)
FROM data_table
┌───count()─┬────────min(paramkey)─┬───────max(paramkey)─┬─min(ind)─┬──max(ind)─┐
https://github.com/ClickHouse/ClickHouse/issues/47092#issuecomment-1485052499
23.3.8.21
<default>
<access_management>1</access_management>
</default>
create user foo;
grant all on *.* to foo;
@den-crane
den-crane / MinMax.md
Last active June 6, 2023 18:38
Using MinMax skip index to improve partition reading
create table test(tenant_id UInt32,  ts1 DateTime, timestamp DateTime)
engine=MergeTree
partition by toDate(ts1)
PRIMARY KEY (tenant_id, toStartOfHour(timestamp))
ORDER BY (tenant_id, toStartOfHour(timestamp),  timestamp)
as select 1, now() - number/10 x, x from numbers(1e8);

insert into test select number%1000, now() - number/10 x, 0 from numbers(1e8);
@den-crane
den-crane / ingest.md
Created April 27, 2023 12:17
ingest data from file using clickhouse-http-java-client

Test data

clickhouse-client -q "select number A, now() B, 'x' C from numbers(1e6) format Parquet" > test.parquet

Maven

    <dependencies>
@den-crane
den-crane / djoin.sql
Last active April 23, 2023 06:19
djoin
-- in different versions of CH it works differently, and does not work at all with circle replication
drop table t1 on cluster segmented;
drop table t2 on cluster segmented;
create table t1 on cluster segmented (A Int64) Engine=MergeTree order by tuple();
create table t2 on cluster segmented (A Int64) Engine=MergeTree order by tuple();
create table t1d on cluster segmented as t1 Engine=Distributed(segmented, currentDatabase(), t1, A);
create table t2d on cluster segmented as t2 Engine=Distributed(segmented, currentDatabase(), t2, A);
@den-crane
den-crane / total_over_group.md
Created March 17, 2023 18:20
Average / Total sum over group
create table A (pk_col Int64, col1 Float64, col4 Float64, day Date) Engine=Memory
as select * from values( 
            (1, 3, 4, today()), (1, 1, 4, today()), (1, 2, 0, today()-1), 
            (2, 3, 4, today()), (2, 3, 0, today()-1) );

Window function