Created
September 14, 2023 20:58
-
-
Save portnov/0f8ec7c651316b5d5d1deffd2f14a2bf to your computer and use it in GitHub Desktop.
operations EAV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- portnov@localhost:1521/TXPDB | |
-- 14 сент. 2023 г. 15:40:23 | |
create table tst_operation ( | |
id int8 generated always as identity primary key, | |
date datetime not null, | |
amt numeric not null | |
); | |
create index tst_operation_date on tst_operation (date, id); | |
create table tst_operation_tag ( | |
operation_id int8 not null references tst_operation (id), | |
tag_name text not null, | |
tag_value text not null, | |
primary key (tag_value, tag_name, operation_id) | |
); | |
truncate table tst_operation_tag; | |
truncate table tst_operation cascade; | |
-- data preparation; takes 3 minutes | |
with p as ( | |
select array['TTK', 'MTS', 'BEELINE'] providers | |
), | |
m as ( | |
select array['MAGNIT', 'PYATEROCHKA', 'LENTA', 'GLORIA', 'PEREKRESTOK', 'ASHAN', 'AMAZON'] merchants | |
), | |
new_operations (id) as ( | |
insert into tst_operation (date, amt) | |
select day, round(random()*1000, 2) | |
from generate_series(1, 10000) ops (seq) -- 10k operations per day | |
cross join generate_series('2023-01-01', '2023-12-31', interval '1 day') days (day) | |
returning tst_operation.id | |
) | |
insert into tst_operation_tag (operation_id, tag_name, tag_value) | |
select id, 'provider', providers[floor(random()*array_length(providers,1))::int + 1] | |
from new_operations, p | |
union all | |
select id, 'merchant', merchants[floor(random()*array_length(merchants,1))::int + 1] | |
from new_operations, m | |
union all | |
select id, 'customer', 'customer_' || (floor(random()*1000)::int + 1) -- 1000 customers | |
from new_operations; | |
select count(1) from tst_operation; | |
-- 3 650 000 | |
select count(1) from tst_operation_tag; | |
-- 10 950 000 | |
create statistics st_operation_tag (mcv, dependencies) on tag_name, tag_value from tst_operation_tag; | |
analyze tst_operation; | |
analyze tst_operation_tag; | |
explain analyze | |
select count(*), sum(op.amt) | |
from tst_operation op | |
inner join tst_operation_tag t1 on t1.operation_id = op.id | |
inner join tst_operation_tag t2 on t2.operation_id = op.id | |
inner join tst_operation_tag t3 on t3.operation_id = op.id | |
where t1.tag_name = 'provider' | |
and t1.tag_value = 'TTK' | |
and t2.tag_name = 'merchant' | |
and t2.tag_value = 'PEREKRESTOK' | |
and t3.tag_name = 'customer' | |
and t3.tag_value = 'customer_99' | |
and op.date between '2023-03-01' and '2023-05-31'; | |
/* | |
count|sum | | |
-----+--------+ | |
41|22668.53| | |
*/ | |
/* | |
QUERY PLAN | | |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | |
Aggregate (cost=2206.37..2206.40 rows=1 width=40) (actual time=21.681..21.683 rows=1 loops=1) | | |
-> Nested Loop (cost=2.11..2206.30 rows=15 width=6) (actual time=3.197..21.663 rows=41 loops=1) | | |
-> Nested Loop (cost=1.55..2173.97 rows=45 width=30) (actual time=3.170..20.956 rows=115 loops=1) | | |
-> Nested Loop (cost=1.12..2080.46 rows=174 width=16) (actual time=0.059..19.021 rows=494 loops=1) | | |
-> Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t3 (cost=0.56..62.72 rows=1248 width=8) (actual time=0.028..0.809 rows=3569 loops=1) | | |
Index Cond: ((tag_value = 'customer_99'::text) AND (tag_name = 'customer'::text)) | | |
Heap Fetches: 0 | | |
-> Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t2 (cost=0.56..1.62 rows=1 width=8) (actual time=0.005..0.005 rows=0 loops=3569) | | |
Index Cond: ((tag_value = 'PEREKRESTOK'::text) AND (tag_name = 'merchant'::text) AND (operation_id = t3.operation_id)) | | |
Heap Fetches: 0 | | |
-> Index Scan using tst_operation_pkey on tst_operation op (cost=0.43..0.54 rows=1 width=14) (actual time=0.004..0.004 rows=0 loops=494) | | |
Index Cond: (id = t2.operation_id) | | |
Filter: (((date)::timestamp without time zone >= '2023-03-01 00:00:00'::timestamp without time zone) AND ((date)::timestamp without time zone <= '2023-05-31 00:00:00'::timestamp without time zone))| | |
Rows Removed by Filter: 1 | | |
-> Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t1 (cost=0.56..0.72 rows=1 width=8) (actual time=0.006..0.006 rows=0 loops=115) | | |
Index Cond: ((tag_value = 'TTK'::text) AND (tag_name = 'provider'::text) AND (operation_id = op.id)) | | |
Heap Fetches: 0 | | |
Planning Time: 0.695 ms | | |
Execution Time: 21.738 ms | | |
*/ | |
explain analyze | |
select count(*), sum(op.amt) | |
from tst_operation op | |
inner join tst_operation_tag t1 on t1.operation_id = op.id | |
where t1.tag_name = 'provider' | |
and t1.tag_value = 'TTK' | |
and op.date between '2023-03-01' and '2023-05-31'; | |
/* | |
count |sum | | |
------+------------+ | |
306115|152849546.80| | |
*/ | |
/* | |
QUERY PLAN | | |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | |
Finalize Aggregate (cost=98515.57..98515.60 rows=1 width=32) (actual time=339.046..351.796 rows=1 loops=1) | | |
-> Gather (cost=98515.32..98515.55 rows=2 width=32) (actual time=337.424..351.770 rows=3 loops=1) | | |
Workers Planned: 2 | | |
Workers Launched: 2 | | |
-> Partial Aggregate (cost=97515.32..97515.35 rows=1 width=32) (actual time=334.503..334.505 rows=1 loops=3) | | |
-> Parallel Hash Join (cost=52292.80..97206.62 rows=123481 width=6) (actual time=242.085..326.573 rows=102038 loops=3) | | |
Hash Cond: (t1.operation_id = op.id) | | |
-> Parallel Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t1 (cost=0.56..37926.67 rows=490175 width=8) (actual time=0.045..69.729 rows=405323 loops=3) | | |
Index Cond: ((tag_value = 'TTK'::text) AND (tag_name = 'provider'::text)) | | |
Heap Fetches: 0 | | |
-> Parallel Hash (cost=37969.97..37969.97 rows=383116 width=14) (actual time=117.194..117.194 rows=306667 loops=3) | | |
Buckets: 262144 Batches: 8 Memory Usage: 7488kB | | |
-> Parallel Index Scan using tst_operation_date on tst_operation op (cost=0.43..37969.97 rows=383116 width=14) (actual time=0.040..67.760 rows=306667 loops=3) | | |
Index Cond: (((date)::timestamp without time zone >= '2023-03-01 00:00:00'::timestamp without time zone) AND ((date)::timestamp without time zone <= '2023-05-31 00:00:00'::timestamp without time zone))| | |
Planning Time: 0.212 ms | | |
Execution Time: 351.835 ms | | |
*/ | |
explain analyze | |
select count(*), sum(op.amt) | |
from tst_operation op | |
inner join tst_operation_tag t1 on t1.operation_id = op.id | |
where t1.tag_name = 'merchant' | |
and t1.tag_value = 'MAGNIT' | |
and op.date between '2023-03-01' and '2023-05-31'; | |
/* | |
count |sum | | |
------+-----------+ | |
130743|65147241.83| | |
*/ | |
/* | |
QUERY PLAN | | |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | |
Finalize Aggregate (cost=63371.18..63371.21 rows=1 width=40) (actual time=219.202..230.805 rows=1 loops=1) | | |
-> Gather (cost=63370.93..63371.16 rows=2 width=40) (actual time=217.915..230.794 rows=3 loops=1) | | |
Workers Planned: 2 | | |
Workers Launched: 2 | | |
-> Partial Aggregate (cost=62370.93..62370.96 rows=1 width=40) (actual time=214.628..214.630 rows=1 loops=3) | | |
-> Parallel Hash Join (cost=23130.56..62105.78 rows=53030 width=6) (actual time=85.931..210.434 rows=43581 loops=3) | | |
Hash Cond: (op.id = t1.operation_id) | | |
-> Parallel Index Scan using tst_operation_date on tst_operation op (cost=0.43..37969.97 rows=383116 width=14) (actual time=0.042..51.422 rows=306667 loops=3) | | |
Index Cond: (((date)::timestamp without time zone >= '2023-03-01 00:00:00'::timestamp without time zone) AND ((date)::timestamp without time zone <= '2023-05-31 00:00:00'::timestamp without time zone))| | |
-> Parallel Hash (cost=16288.56..16288.56 rows=210510 width=8) (actual time=84.675..84.676 rows=173468 loops=3) | | |
Buckets: 524288 Batches: 1 Memory Usage: 24480kB | | |
-> Parallel Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t1 (cost=0.56..16288.56 rows=210510 width=8) (actual time=0.041..45.999 rows=173468 loops=3) | | |
Index Cond: ((tag_value = 'MAGNIT'::text) AND (tag_name = 'merchant'::text)) | | |
Heap Fetches: 0 | | |
Planning Time: 0.187 ms | | |
Execution Time: 230.846 ms | | |
*/ | |
explain analyze | |
select count(*), sum(op.amt) | |
from tst_operation op | |
inner join tst_operation_tag t1 on t1.operation_id = op.id | |
where t1.tag_name = 'customer' | |
and t1.tag_value = 'customer_99' | |
and op.date between '2023-03-01' and '2023-05-31'; | |
/* | |
count|sum | | |
-----+---------+ | |
898|431805.31| | |
*/ | |
/* | |
QUERY PLAN | | |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | |
Aggregate (cost=3092.79..3092.82 rows=1 width=40) (actual time=11.638..11.639 rows=1 loops=1) | | |
-> Nested Loop (cost=0.99..3091.22 rows=314 width=6) (actual time=1.837..11.514 rows=898 loops=1) | | |
-> Index Only Scan using tst_operation_tag_pkey on tst_operation_tag t1 (cost=0.56..62.72 rows=1248 width=8) (actual time=0.022..0.739 rows=3569 loops=1) | | |
Index Cond: ((tag_value = 'customer_99'::text) AND (tag_name = 'customer'::text)) | | |
Heap Fetches: 0 | | |
-> Index Scan using tst_operation_pkey on tst_operation op (cost=0.43..2.43 rows=1 width=14) (actual time=0.003..0.003 rows=0 loops=3569) | | |
Index Cond: (id = t1.operation_id) | | |
Filter: (((date)::timestamp without time zone >= '2023-03-01 00:00:00'::timestamp without time zone) AND ((date)::timestamp without time zone <= '2023-05-31 00:00:00'::timestamp without time zone))| | |
Rows Removed by Filter: 1 | | |
Planning Time: 0.189 ms | | |
Execution Time: 11.673 ms | | |
*/ | |
explain analyze | |
select count(*), sum(op.amt) | |
from tst_operation op | |
inner join tst_operation_tag t1 on t1.operation_id = op.id | |
where t1.tag_value = 'customer_99' | |
and op.date between '2023-03-01' and '2023-05-31'; | |
/* | |
count|sum | | |
-----+---------+ | |
898|431805.31| | |
*/ | |
/* | |
QUERY PLAN | | |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | |
Finalize Aggregate (cost=8268.87..8268.90 rows=1 width=40) (actual time=15.829..22.043 rows=1 loops=1) | | |
-> Gather (cost=8268.62..8268.85 rows=2 width=40) (actual time=15.683..22.030 rows=3 loops=1) | | |
Workers Planned: 2 | | |
Workers Launched: 2 | | |
-> Partial Aggregate (cost=7268.62..7268.65 rows=1 width=40) (actual time=12.485..12.487 rows=1 loops=3) | | |
-> Nested Loop (cost=66.25..7266.68 rows=387 width=6) (actual time=1.765..12.405 rows=299 loops=3) | | |
-> Parallel Bitmap Heap Scan on tst_operation_tag t1 (cost=65.82..3767.94 rows=1535 width=8) (actual time=0.488..4.095 rows=1190 loops=3) | | |
Recheck Cond: (tag_value = 'customer_99'::text) | | |
Heap Blocks: exact=2227 | | |
-> Bitmap Index Scan on tst_operation_tag_pkey (cost=0.00..64.90 rows=3683 width=0) (actual time=0.834..0.834 rows=3569 loops=1) | | |
Index Cond: (tag_value = 'customer_99'::text) | | |
-> Index Scan using tst_operation_pkey on tst_operation op (cost=0.43..2.28 rows=1 width=14) (actual time=0.007..0.007 rows=0 loops=3569) | | |
Index Cond: (id = t1.operation_id) | | |
Filter: (((date)::timestamp without time zone >= '2023-03-01 00:00:00'::timestamp without time zone) AND ((date)::timestamp without time zone <= '2023-05-31 00:00:00'::timestamp without time zone))| | |
Rows Removed by Filter: 1 | | |
Planning Time: 0.190 ms | | |
Execution Time: 22.095 ms | | |
*/ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment