Skip to content

Instantly share code, notes, and snippets.

@portnov
Created September 14, 2023 20:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save portnov/0f8ec7c651316b5d5d1deffd2f14a2bf to your computer and use it in GitHub Desktop.
Save portnov/0f8ec7c651316b5d5d1deffd2f14a2bf to your computer and use it in GitHub Desktop.
operations EAV
-- portnov@localhost:1521/TXPDB
-- 14 сент. 2023 г. 15:40:23
create table tst_operation (
id int8 generated always as identity primary key,
date datetime not null,
amt numeric not null
);
create index tst_operation_date on tst_operation (date, id);
create table tst_operation_tag (
operation_id int8 not null references tst_operation (id),
tag_name text not null,
tag_value text not null,
primary key (tag_value, tag_name, operation_id)
);
truncate table tst_operation_tag;
truncate table tst_operation cascade;
-- data preparation; takes 3 minutes
with p as (
select array['TTK', 'MTS', 'BEELINE'] providers
),
m as (
select array['MAGNIT', 'PYATEROCHKA', 'LENTA', 'GLORIA', 'PEREKRESTOK', 'ASHAN', 'AMAZON'] merchants
),
new_operations (id) as (
insert into tst_operation (date, amt)
select day, round(random()*1000, 2)
from generate_series(1, 10000) ops (seq) -- 10k operations per day
cross join generate_series('2023-01-01', '2023-12-31', interval '1 day') days (day)
returning tst_operation.id
)
insert into tst_operation_tag (operation_id, tag_name, tag_value)
select id, 'provider', providers[floor(random()*array_length(providers,1))::int + 1]
from new_operations, p
union all
select id, 'merchant', merchants[floor(random()*array_length(merchants,1))::int + 1]
from new_operations, m
union all
select id, 'customer', 'customer_' || (floor(random()*1000)::int + 1) -- 1000 customers
from new_operations;
select count(1) from tst_operation;
-- 3 650 000
select count(1) from tst_operation_tag;
-- 10 950 000
create statistics st_operation_tag (mcv, dependencies) on tag_name, tag_value from tst_operation_tag;
analyze tst_operation;
analyze tst_operation_tag;
explain analyze
select count(*), sum(op.amt)
from tst_operation op
inner join tst_operation_tag t1 on t1.operation_id = op.id
inner join tst_operation_tag t2 on t2.operation_id = op.id
inner join tst_operation_tag t3 on t3.operation_id = op.id
where t1.tag_name = 'provider'
and t1.tag_value = 'TTK'
and t2.tag_name = 'merchant'
and t2.tag_value = 'PEREKRESTOK'
and t3.tag_name = 'customer'
and t3.tag_value = 'customer_99'
and op.date between '2023-03-01' and '2023-05-31';
/*
count|sum |
-----+--------+
41|22668.53|
*/
/*
QUERY PLAN |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
Aggregate (cost=2206.37..2206.40 rows=1 width=40) (actual time=21.681..21.683 rows=1 loops=1) |
-> Nested Loop (cost=2.11..2206.30 rows=15 width=6) (actual time=3.197..21.663 rows=41 loops=1) |
-> Nested Loop (cost=1.55..2173.97 rows=45 width=30) (actual time=3.170..20.956 rows=115 loops=1) |
-> Nested Loop (cost=1.12..2080.46 rows=174 width=16) (actual time=0.059..19.021 rows=494 loops=1) |
-> Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t3 (cost=0.56..62.72 rows=1248 width=8) (actual time=0.028..0.809 rows=3569 loops=1) |
Index Cond: ((tag_value = 'customer_99'::text) AND (tag_name = 'customer'::text)) |
Heap Fetches: 0 |
-> Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t2 (cost=0.56..1.62 rows=1 width=8) (actual time=0.005..0.005 rows=0 loops=3569) |
Index Cond: ((tag_value = 'PEREKRESTOK'::text) AND (tag_name = 'merchant'::text) AND (operation_id = t3.operation_id)) |
Heap Fetches: 0 |
-> Index Scan using tst_operation_pkey on tst_operation op (cost=0.43..0.54 rows=1 width=14) (actual time=0.004..0.004 rows=0 loops=494) |
Index Cond: (id = t2.operation_id) |
Filter: (((date)::timestamp without time zone >= '2023-03-01 00:00:00'::timestamp without time zone) AND ((date)::timestamp without time zone <= '2023-05-31 00:00:00'::timestamp without time zone))|
Rows Removed by Filter: 1 |
-> Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t1 (cost=0.56..0.72 rows=1 width=8) (actual time=0.006..0.006 rows=0 loops=115) |
Index Cond: ((tag_value = 'TTK'::text) AND (tag_name = 'provider'::text) AND (operation_id = op.id)) |
Heap Fetches: 0 |
Planning Time: 0.695 ms |
Execution Time: 21.738 ms |
*/
explain analyze
select count(*), sum(op.amt)
from tst_operation op
inner join tst_operation_tag t1 on t1.operation_id = op.id
where t1.tag_name = 'provider'
and t1.tag_value = 'TTK'
and op.date between '2023-03-01' and '2023-05-31';
/*
count |sum |
------+------------+
306115|152849546.80|
*/
/*
QUERY PLAN |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
Finalize Aggregate (cost=98515.57..98515.60 rows=1 width=32) (actual time=339.046..351.796 rows=1 loops=1) |
-> Gather (cost=98515.32..98515.55 rows=2 width=32) (actual time=337.424..351.770 rows=3 loops=1) |
Workers Planned: 2 |
Workers Launched: 2 |
-> Partial Aggregate (cost=97515.32..97515.35 rows=1 width=32) (actual time=334.503..334.505 rows=1 loops=3) |
-> Parallel Hash Join (cost=52292.80..97206.62 rows=123481 width=6) (actual time=242.085..326.573 rows=102038 loops=3) |
Hash Cond: (t1.operation_id = op.id) |
-> Parallel Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t1 (cost=0.56..37926.67 rows=490175 width=8) (actual time=0.045..69.729 rows=405323 loops=3) |
Index Cond: ((tag_value = 'TTK'::text) AND (tag_name = 'provider'::text)) |
Heap Fetches: 0 |
-> Parallel Hash (cost=37969.97..37969.97 rows=383116 width=14) (actual time=117.194..117.194 rows=306667 loops=3) |
Buckets: 262144 Batches: 8 Memory Usage: 7488kB |
-> Parallel Index Scan using tst_operation_date on tst_operation op (cost=0.43..37969.97 rows=383116 width=14) (actual time=0.040..67.760 rows=306667 loops=3) |
Index Cond: (((date)::timestamp without time zone >= '2023-03-01 00:00:00'::timestamp without time zone) AND ((date)::timestamp without time zone <= '2023-05-31 00:00:00'::timestamp without time zone))|
Planning Time: 0.212 ms |
Execution Time: 351.835 ms |
*/
explain analyze
select count(*), sum(op.amt)
from tst_operation op
inner join tst_operation_tag t1 on t1.operation_id = op.id
where t1.tag_name = 'merchant'
and t1.tag_value = 'MAGNIT'
and op.date between '2023-03-01' and '2023-05-31';
/*
count |sum |
------+-----------+
130743|65147241.83|
*/
/*
QUERY PLAN |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
Finalize Aggregate (cost=63371.18..63371.21 rows=1 width=40) (actual time=219.202..230.805 rows=1 loops=1) |
-> Gather (cost=63370.93..63371.16 rows=2 width=40) (actual time=217.915..230.794 rows=3 loops=1) |
Workers Planned: 2 |
Workers Launched: 2 |
-> Partial Aggregate (cost=62370.93..62370.96 rows=1 width=40) (actual time=214.628..214.630 rows=1 loops=3) |
-> Parallel Hash Join (cost=23130.56..62105.78 rows=53030 width=6) (actual time=85.931..210.434 rows=43581 loops=3) |
Hash Cond: (op.id = t1.operation_id) |
-> Parallel Index Scan using tst_operation_date on tst_operation op (cost=0.43..37969.97 rows=383116 width=14) (actual time=0.042..51.422 rows=306667 loops=3) |
Index Cond: (((date)::timestamp without time zone >= '2023-03-01 00:00:00'::timestamp without time zone) AND ((date)::timestamp without time zone <= '2023-05-31 00:00:00'::timestamp without time zone))|
-> Parallel Hash (cost=16288.56..16288.56 rows=210510 width=8) (actual time=84.675..84.676 rows=173468 loops=3) |
Buckets: 524288 Batches: 1 Memory Usage: 24480kB |
-> Parallel Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t1 (cost=0.56..16288.56 rows=210510 width=8) (actual time=0.041..45.999 rows=173468 loops=3) |
Index Cond: ((tag_value = 'MAGNIT'::text) AND (tag_name = 'merchant'::text)) |
Heap Fetches: 0 |
Planning Time: 0.187 ms |
Execution Time: 230.846 ms |
*/
explain analyze
select count(*), sum(op.amt)
from tst_operation op
inner join tst_operation_tag t1 on t1.operation_id = op.id
where t1.tag_name = 'customer'
and t1.tag_value = 'customer_99'
and op.date between '2023-03-01' and '2023-05-31';
/*
count|sum |
-----+---------+
898|431805.31|
*/
/*
QUERY PLAN |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
Aggregate (cost=3092.79..3092.82 rows=1 width=40) (actual time=11.638..11.639 rows=1 loops=1) |
-> Nested Loop (cost=0.99..3091.22 rows=314 width=6) (actual time=1.837..11.514 rows=898 loops=1) |
-> Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t1 (cost=0.56..62.72 rows=1248 width=8) (actual time=0.022..0.739 rows=3569 loops=1) |
Index Cond: ((tag_value = 'customer_99'::text) AND (tag_name = 'customer'::text)) |
Heap Fetches: 0 |
-> Index Scan using tst_operation_pkey on tst_operation op (cost=0.43..2.43 rows=1 width=14) (actual time=0.003..0.003 rows=0 loops=3569) |
Index Cond: (id = t1.operation_id) |
Filter: (((date)::timestamp without time zone >= '2023-03-01 00:00:00'::timestamp without time zone) AND ((date)::timestamp without time zone <= '2023-05-31 00:00:00'::timestamp without time zone))|
Rows Removed by Filter: 1 |
Planning Time: 0.189 ms |
Execution Time: 11.673 ms |
*/
explain analyze
select count(*), sum(op.amt)
from tst_operation op
inner join tst_operation_tag t1 on t1.operation_id = op.id
where t1.tag_value = 'customer_99'
and op.date between '2023-03-01' and '2023-05-31';
/*
count|sum |
-----+---------+
898|431805.31|
*/
/*
QUERY PLAN |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
Finalize Aggregate (cost=8268.87..8268.90 rows=1 width=40) (actual time=15.829..22.043 rows=1 loops=1) |
-> Gather (cost=8268.62..8268.85 rows=2 width=40) (actual time=15.683..22.030 rows=3 loops=1) |
Workers Planned: 2 |
Workers Launched: 2 |
-> Partial Aggregate (cost=7268.62..7268.65 rows=1 width=40) (actual time=12.485..12.487 rows=1 loops=3) |
-> Nested Loop (cost=66.25..7266.68 rows=387 width=6) (actual time=1.765..12.405 rows=299 loops=3) |
-> Parallel Bitmap Heap Scan on tst_operation_tag t1 (cost=65.82..3767.94 rows=1535 width=8) (actual time=0.488..4.095 rows=1190 loops=3) |
Recheck Cond: (tag_value = 'customer_99'::text) |
Heap Blocks: exact=2227 |
-> Bitmap Index Scan on tst_operation_tag_pkey (cost=0.00..64.90 rows=3683 width=0) (actual time=0.834..0.834 rows=3569 loops=1) |
Index Cond: (tag_value = 'customer_99'::text) |
-> Index Scan using tst_operation_pkey on tst_operation op (cost=0.43..2.28 rows=1 width=14) (actual time=0.007..0.007 rows=0 loops=3569) |
Index Cond: (id = t1.operation_id) |
Filter: (((date)::timestamp without time zone >= '2023-03-01 00:00:00'::timestamp without time zone) AND ((date)::timestamp without time zone <= '2023-05-31 00:00:00'::timestamp without time zone))|
Rows Removed by Filter: 1 |
Planning Time: 0.190 ms |
Execution Time: 22.095 ms |
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment