Skip to content

Instantly share code, notes, and snippets.

View den-crane's full-sized avatar
🙀
What's happening?

Denny Crane den-crane

🙀
What's happening?
View GitHub Profile
@den-crane
den-crane / summingmergetree-arrays-nested
Last active November 30, 2018 16:39
summingmergetree-arrays-nested
truncate table logs;
truncate table agg_table;
CREATE TABLE logs(
date Date,
ts DateTime,
groupId Int8,
subGroupId Int8) ENGINE = MergeTree() PARTITION BY date ORDER BY (groupId);
@den-crane
den-crane / AggregatingMergeTree-event-enrichment
Last active September 7, 2023 17:32
AggregatingMergeTree-event-enrichment
CREATE TABLE states_raw(
d date,
uid UInt64,
first_name String,
last_name String,
modification_timestamp_mcs DateTime64(3) default now64(3)
) ENGINE = Null;
CREATE TABLE final_states_by_month(
d date,
@den-crane
den-crane / clickhouse-locate-by-bucket
Last active December 14, 2018 18:21
locate-by-bucket
# 103 buckets (10007 buckets do not work).
create table foo_h(user_id String, date Date, val UInt64, h UInt16) engine=MergeTree order by (h, date);
insert into foo_h select toString(cityHash64(toString(number))) user_id, '2018-12-14', number, cityHash64(toString(number)) % 103 from numbers(50000000);
insert into foo_h select toString(cityHash64(toString(number))) user_id, '2018-12-13', number, cityHash64(toString(number)) % 103 from numbers(50000000);
insert into foo_h select toString(cityHash64(toString(number))) user_id, '2018-12-12', number, cityHash64(toString(number)) % 103 from numbers(5);
select count() from foo_h prewhere user_id = '10408321403207385874';
1 rows in set. Elapsed: 0.695 sec. Processed 100.00 million rows,
@den-crane
den-crane / AggregatingMergeTree-groupArrayState
Last active July 25, 2023 16:15
AggregatingMergeTree-groupArrayState
----------------------- SimpleAggregateFunction + groupArrayArray ----------------------------
drop table if exists states_raw;
drop table if exists final_states_by_month;
drop table if exists final_states_by_month_mv;
CREATE TABLE states_raw(d date, uid UInt64, s String) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state SimpleAggregateFunction(groupArrayArray, Array(String)))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
drop table fact;
drop table animals;
drop table colors;
create table fact(id Int64, animal_key Int64, color_key Int64) Engine = MergeTree order by tuple();
insert into fact values (1,1,1),(2,2,2);
create table animals(animal_key UInt64, animal_name String) Engine = MergeTree order by tuple();
insert into animals values (0, 'unknown');
@den-crane
den-crane / metrics_metrics1
Last active March 21, 2022 22:19
order by (metric, time) VS order by (time, metric)
create table metrics (metric UInt32, val Float64, time DateTime) engine = MergeTree()
partition by toYYYYMMDD(time)
order by (metric, time)
insert into metrics select number%4999, rand(), (toDateTime(today())+number%2880*30) from numbers(100000000);
insert into metrics select number%4999, rand(), (toDateTime(today()-1)+number%2880*30) from numbers(100000000);
insert into metrics select number%4999, rand(), (toDateTime(today()-2)+number%2880*30) from numbers(100000000);
insert into metrics select number%4999, rand(), (toDateTime(today()-3)+number%2880*30) from numbers(100000000);
insert into metrics select number%4999, rand(), (toDateTime(today()-4)+number%2880*30) from numbers(100000000);
@den-crane
den-crane / several_MV_one_internal_store
Created March 10, 2019 13:48
MV union all workaround
create table tableA (A String) Engine=MergeTree order by tuple();
create table tableB (B String) Engine=MergeTree order by tuple();
create table tableC (C String) Engine=Null;
create table storeABC(ABC String) Engine=MergeTree order by tuple();
create materialized view MVA to storeABC as select A ABC from tableA;
create materialized view MVB to storeABC as select B ABC from tableB;
create materialized view MVC to storeABC as select C ABC from tableC;
insert into tableA values('A');
insert into tableB values('B');
insert into tableC values('C');
@den-crane
den-crane / CH-skip-index
Last active March 11, 2019 15:37
CH-skip-index
create table BX(I Int64, S String) Engine=MergeTree order by I;
insert into BX select number, toString(rand()) from numbers(10000000);
ALTER TABLE BX ADD INDEX idx0 S TYPE minmax GRANULARITY 10;
optimize table BX final;
select count() from BX prewhere S = '666';
0 rows in set. Processed 10.00 million rows,
select count() from BX prewhere S = 'A';
0 rows in set. Elapsed: 0.001 sec.
@den-crane
den-crane / sample_offset
Created March 14, 2019 15:17
sample 0.33 offset 0.33 example
CREATE TABLE fff ( tx Int64) ENGINE = MergeTree() ORDER BY (intHash32(tx)) SAMPLE BY intHash32(tx)
insert into fff select number from numbers(10);
select groupArray(tx) from fff sample 0.33 offset 0
[9,6,3,1]
select groupArray(tx) from fff sample 0.33 offset 0.33
[8,2,5]
select groupArray(tx) from fff sample 0.33 offset 0.66
@den-crane
den-crane / CH_percentage_from_total
Created April 10, 2019 18:30
CH percentage from total using groupArray
SELECT grp, ga.1 key, ga.2 value, vsum, value / vsum*100 percent
FROM (SELECT grp, groupArray(tuple (key,value)) ga, SUM(value) vsum
FROM (SELECT 1 grp,'k1' key,33 value UNION ALL SELECT 1, 'k2', 11 UNION ALL SELECT 2,'k1', 0 UNION ALL SELECT 2,'k2',1)
GROUP BY grp) Array JOIN ga