Skip to content

Instantly share code, notes, and snippets.

@den-crane
Last active January 10, 2023 15:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save den-crane/00f8e7f8087344512120eec400d086db to your computer and use it in GitHub Desktop.
Save den-crane/00f8e7f8087344512120eec400d086db to your computer and use it in GitHub Desktop.
How to populate some column in AggregatingMergeTree

How to populate some column in AggregatingMergeTree if you use AggregateFunctions for all measurements? You can use simple insert and omit columns which should not be populated. AggregateFunction States do everything else magically

CREATE TABLE values_minute
(
    id LowCardinality(String),
    ts DateTime,
    count_state AggregateFunction(count),
    max_state AggregateFunction(max, Float64),
    min_state AggregateFunction(min, Float64),
    avg_state AggregateFunction(avg, Float64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(ts)
ORDER BY (id, ts);

insert into values_minute(id, ts, count_state, max_state, min_state, avg_state)
select 1, '2022-01-01 00:00:00', countState(), maxState(x),  minState(x), avgState(x) 
from (select toFloat64(number+3) x from numbers(10));

select id, ts, countMerge(count_state) cnt, maxMerge(max_state) max_value, 
       minMerge(min_state) min_value, avgMerge(avg_state) avg_value 
from values_minute 
group by id, ts;

┌─id─┬──────────────────ts─┬─cnt─┬─max_value─┬─min_value─┬─avg_value─┐
│ 12022-01-01 00:00:00101237.5 │
└────┴─────────────────────┴─────┴───────────┴───────────┴───────────┘

let's add new columns

alter table values_minute 
    add column `max_new_state` AggregateFunction(max, Float64),
    add column `min_new_state` AggregateFunction(min, Float64), 
    add column `avg_new_state` AggregateFunction(avg, Float64);

and populate them using simple insert

insert into values_minute(id, ts, max_new_state, min_new_state, avg_new_state)
select 1, '2022-01-01 00:00:00', maxState(x),  minState(x), avgState(x) 
from (select toFloat64(number+100) x from numbers(10));

select id, ts, countMerge(count_state) cnt, maxMerge(max_state) max_value, 
       minMerge(min_state) min_value, avgMerge(avg_state) avg_value, 
       maxMerge(max_new_state) max_new, minMerge(min_new_state) min_new,  
       avgMerge(avg_new_state) avg_new 
from values_minute 
group by id, ts;
┌─id─┬──────────────────ts─┬─cnt─┬─max_value─┬─min_value─┬─avg_value─┬─max_new─┬─min_new─┬─avg_new─┐
│ 12022-01-01 00:00:00101237.5109100104.5 │
└────┴─────────────────────┴─────┴───────────┴───────────┴───────────┴─────────┴─────────┴─────────┘

Instead of insert into values_minute select you can use Null engine + MatView.

CREATE TABLE dummy
(
    id LowCardinality(String),
    ts DateTime,
    new Float64
)
ENGINE = Null;

CREATE MATERIALIZED VIEW temp_mv TO values_minute AS
SELECT
    id, ts, 
    maxState(new) max_new_state, 
    minState(new) min_new_state, 
    avgState(new) avg_new_state
FROM dummy
GROUP BY id, ts;

insert into dummy
select 1, '2022-01-01 00:00:00', number+110
from numbers(10);

select id, ts, countMerge(count_state) cnt, maxMerge(max_state) max_value, 
       minMerge(min_state) min_value, avgMerge(avg_state) avg_value, 
       maxMerge(max_new_state) max_new, minMerge(min_new_state) min_new,  
       avgMerge(avg_new_state) avg_new 
from values_minute 
group by id, ts;
┌─id─┬──────────────────ts─┬─cnt─┬─max_value─┬─min_value─┬─avg_value─┬─max_new─┬─min_new─┬─avg_new─┐
│ 12022-01-01 00:00:00101237.5119100109.5 │
└────┴─────────────────────┴─────┴───────────┴───────────┴───────────┴─────────┴─────────┴─────────┘
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment