Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save marliotto/3385e5e52a8f4abb276ca026b158cf6b to your computer and use it in GitHub Desktop.
Save marliotto/3385e5e52a8f4abb276ca026b158cf6b to your computer and use it in GitHub Desktop.
AggregatingMergeTree-groupArrayState
drop table if exists states_raw;
drop table if exists final_states_by_month;
drop table if exists final_states_by_month_mv;
CREATE TABLE states_raw(d date, uid UInt64, s String) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state AggregateFunction(groupArray, String))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
CREATE MATERIALIZED VIEW final_states_by_month_mv TO final_states_by_month AS
SELECT d, uid, groupArrayState(s) a_state FROM states_raw GROUP BY d, uid;
insert into states_raw values('2019-01-01', 1, 'x'),('2019-01-02', 1, 'y'), ('2019-01-01', 1, 'z');
select uid, groupArrayMerge(a_state) from final_states_by_month group by uid;
┌─uid─┬─groupArrayMerge(a_state)─┐
│ 1 │ ['x','z','y'] │
└─────┴──────────────────────────┘
---------------------------------------------------
groupArrayArray
CREATE TABLE states_raw(d date, uid UInt64, s Array(String)) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state AggregateFunction(groupArrayArray, Array(String)))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
CREATE MATERIALIZED VIEW final_states_by_month_mv TO final_states_by_month AS
SELECT d, uid, groupArrayArrayState(s) a_state FROM states_raw GROUP BY d, uid;
insert into states_raw values('2019-01-01', 1, ['x']),('2019-01-02', 2, ['y','1']), ('2019-01-01', 3, ['z']);
insert into states_raw values('2019-01-01', 2, ['x']),('2019-01-02', 1, ['y','1']), ('2019-01-01', 3, ['z']);
select uid, groupArrayArrayMerge(a_state) from final_states_by_month group by uid;
optimize table final_states_by_month final
select * from final_states_by_month;
--------------------------------------------------
# groupUniqArrayArray
drop table states_raw;
drop table final_states_by_month;
drop table final_states_by_month_mv
CREATE TABLE states_raw(d date, uid UInt64, s Array(String)) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state AggregateFunction(groupUniqArrayArray, Array(String)))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
CREATE MATERIALIZED VIEW final_states_by_month_mv TO final_states_by_month AS
SELECT d, uid, groupUniqArrayArrayState(s) a_state FROM states_raw GROUP BY d, uid;
insert into states_raw values('2019-01-01', 1, ['x']),('2019-01-02', 2, ['y','1']), ('2019-01-01', 3, ['z']);
insert into states_raw values('2019-01-01', 2, ['x']),('2019-01-02', 1, ['y','1']), ('2019-01-01', 3, ['z']);
insert into states_raw values('2019-01-01', 3, ['a', 'a']);
insert into states_raw values('2019-01-01', 3, ['a', 'b']);
select uid, groupUniqArrayArrayMerge(a_state) from final_states_by_month group by uid;
┌─uid─┬─groupUniqArrayArrayMerge(a_state)─┐
│ 3 │ ['z','a','b'] │
│ 2 │ ['1','y','x'] │
│ 1 │ ['1','y','x'] │
└─────┴───────────────────────────────────┘
optimize table final_states_by_month final
select * from final_states_by_month;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment