Skip to content

Instantly share code, notes, and snippets.

@den-crane
Last active May 8, 2024 10:57
Show Gist options
  • Star 12 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save den-crane/d03524eadbbce0bafa528101afa8f794 to your computer and use it in GitHub Desktop.
Save den-crane/d03524eadbbce0bafa528101afa8f794 to your computer and use it in GitHub Desktop.
AggregatingMergeTree-event-enrichment
CREATE TABLE states_raw(
d date,
uid UInt64,
first_name String,
last_name String,
modification_timestamp_mcs DateTime64(3) default now64(3)
) ENGINE = Null;
CREATE TABLE final_states_by_month(
d date,
uid UInt64,
final_first_name AggregateFunction(argMax, String, DateTime64(3)),
final_last_name AggregateFunction(argMax, String, DateTime64(3)))
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(d)
ORDER BY (uid, d);
CREATE MATERIALIZED VIEW final_states_by_month_mv TO final_states_by_month AS
SELECT
d, uid,
argMaxState(first_name, if(first_name<>'', modification_timestamp_mcs, toDateTime64(0,3))) AS final_first_name,
argMaxState(last_name, if(last_name<>'', modification_timestamp_mcs, toDateTime64(0,3))) AS final_last_name
FROM states_raw
GROUP BY d, uid;
insert into states_raw(d,uid,first_name) values (today(), 1, 'Tom');
insert into states_raw(d,uid,last_name) values (today(), 1, 'Jones');
insert into states_raw(d,uid,first_name,last_name) values (today(), 2, 'XXX', '');
insert into states_raw(d,uid,first_name,last_name) values (today(), 2, 'YYY', 'YYY');
select uid, argMaxMerge(final_first_name) first_name, argMaxMerge(final_last_name) last_name
from final_states_by_month group by uid
┌─uid─┬─first_name─┬─last_name─┐
│ 2 │ YYY │ YYY │
│ 1 │ Tom │ Jones │
└─────┴────────────┴───────────┘
optimize table final_states_by_month final;
select uid, finalizeAggregation(final_first_name) first_name, finalizeAggregation(final_last_name) last_name
from final_states_by_month
┌─uid─┬─first_name─┬─last_name─┐
│ 1 │ Tom │ Jones │
│ 2 │ YYY │ YYY │
└─────┴────────────┴───────────┘
@den-crane
Copy link
Author

@den-crane Shouldn't columns on line 16 and line 24 match? ORDER BY (uid) vs GROUP BY d, uid

they should, my bad

@den-crane
Copy link
Author

@karthikvt26
Copy link

Thank you @den-crane. I wonder what would happen if we have billion's of records and we try to filter by say first_name. Kindly correct me if I am wrong, I believe it would have to do read all the rows to finally return.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment