Skip to content

Instantly share code, notes, and snippets.

@den-crane
Last active July 25, 2023 16:15
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save den-crane/048ce66bc59f5577d56e42d76934682e to your computer and use it in GitHub Desktop.
Save den-crane/048ce66bc59f5577d56e42d76934682e to your computer and use it in GitHub Desktop.
AggregatingMergeTree-groupArrayState
----------------------- SimpleAggregateFunction + groupArrayArray ----------------------------
drop table if exists states_raw;
drop table if exists final_states_by_month;
drop table if exists final_states_by_month_mv;
CREATE TABLE states_raw(d date, uid UInt64, s String) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state SimpleAggregateFunction(groupArrayArray, Array(String)))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
CREATE MATERIALIZED VIEW final_states_by_month_mv TO final_states_by_month AS
SELECT d, uid, groupArray(s) a_state FROM states_raw GROUP BY d, uid;
insert into states_raw values('2019-01-01', 1, 'x'),('2019-01-02', 1, 'y'), ('2019-01-01', 1, 'z');
select uid, groupArrayArray(a_state) from final_states_by_month group by uid;
┌─uid─┬─groupArrayArray(a_state)─┐
│ 1 │ ['x','z','y'] │
└─────┴──────────────────────────┘
--------------------------------- SimpleAggregateFunction + groupUniqArrayArray ------------------
drop table if exists states_raw;
drop table if exists final_states_by_month;
drop table if exists final_states_by_month_mv;
CREATE TABLE states_raw(d date, uid UInt64, s String) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state SimpleAggregateFunction(groupUniqArrayArray, Array(String)))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
CREATE MATERIALIZED VIEW final_states_by_month_mv TO final_states_by_month AS
SELECT d, uid, groupArray(s) a_state FROM states_raw GROUP BY d, uid;
insert into states_raw values('2019-01-01', 1, 'x'),('2019-01-02', 1, 'y'), ('2019-01-01', 1, 'z');
insert into states_raw values('2019-01-01', 1, 'x');
insert into states_raw values('2019-01-01', 1, 'y');
optimize table final_states_by_month final;
select uid, groupArrayArray(a_state) from final_states_by_month group by uid;
┌─uid─┬─groupArrayArray(a_state)─┐
│ 1 │ ['x','y','z'] │
└─────┴──────────────────────────┘
select * from final_states_by_month
┌──────────d─┬─uid─┬─a_state───────┐
│ 2019-01-01 │ 1 │ ['x','y','z'] │
└────────────┴─────┴───────────────┘
--------------------------------- AggregateFunction + groupArray ------------------
drop table if exists states_raw;
drop table if exists final_states_by_month;
drop table if exists final_states_by_month_mv;
CREATE TABLE states_raw(d date, uid UInt64, s String) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state AggregateFunction(groupArray, String))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
CREATE MATERIALIZED VIEW final_states_by_month_mv TO final_states_by_month AS
SELECT d, uid, groupArrayState(s) a_state FROM states_raw GROUP BY d, uid;
insert into states_raw values('2019-01-01', 1, 'x'),('2019-01-02', 1, 'y'), ('2019-01-01', 1, 'z');
select uid, groupArrayMerge(a_state) from final_states_by_month group by uid;
┌─uid─┬─groupArrayMerge(a_state)─┐
│ 1 │ ['x','z','y'] │
└─────┴──────────────────────────┘
---------------------------------------------------
groupArrayArray
CREATE TABLE states_raw(d date, uid UInt64, s Array(String)) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state AggregateFunction(groupArrayArray, Array(String)))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
CREATE MATERIALIZED VIEW final_states_by_month_mv TO final_states_by_month AS
SELECT d, uid, groupArrayArrayState(s) a_state FROM states_raw GROUP BY d, uid;
insert into states_raw values('2019-01-01', 1, ['x']),('2019-01-02', 2, ['y','1']), ('2019-01-01', 3, ['z']);
insert into states_raw values('2019-01-01', 2, ['x']),('2019-01-02', 1, ['y','1']), ('2019-01-01', 3, ['z']);
select uid, groupArrayArrayMerge(a_state) from final_states_by_month group by uid;
optimize table final_states_by_month final
select * from final_states_by_month;
--------------------------------------------------
# groupUniqArrayArray
drop table states_raw;
drop table final_states_by_month;
drop table final_states_by_month_mv
CREATE TABLE states_raw(d date, uid UInt64, s Array(String)) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state AggregateFunction(groupUniqArrayArray, Array(String)))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
CREATE MATERIALIZED VIEW final_states_by_month_mv TO final_states_by_month AS
SELECT d, uid, groupUniqArrayArrayState(s) a_state FROM states_raw GROUP BY d, uid;
insert into states_raw values('2019-01-01', 1, ['x']),('2019-01-02', 2, ['y','1']), ('2019-01-01', 3, ['z']);
insert into states_raw values('2019-01-01', 2, ['x']),('2019-01-02', 1, ['y','1']), ('2019-01-01', 3, ['z']);
insert into states_raw values('2019-01-01', 3, ['a', 'a']);
insert into states_raw values('2019-01-01', 3, ['a', 'b']);
select uid, groupUniqArrayArrayMerge(a_state) from final_states_by_month group by uid;
┌─uid─┬─groupUniqArrayArrayMerge(a_state)─┐
│ 3 │ ['z','a','b'] │
│ 2 │ ['1','y','x'] │
│ 1 │ ['1','y','x'] │
└─────┴───────────────────────────────────┘
optimize table final_states_by_month final
select * from final_states_by_month;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment