bityob/README.md

## README.md

      
    Raw
  

              README.md
            
          
    What

We want to have 1 row by id, but not just aggregating all values with simple array_agg,
but removing null and empty fields, removing duplicates,
and doing it for array field and 2D array field too.
Table before agg logic


Table after agg logic


## agg-2d-arrays-remove-duplications-postgres.sql
--drop table data
--truncate table data
create temp table data (id int, s text, u text, start_time timestamp);

insert into data values
(1, null, null, null),
(1, '{123, 456}', '{{111, 222, 333}, {444, 555, 666}}', '2024-02-20'),
(1, '{456, 789}', '{{111, 222, 333}}', '2024-02-21'),
(1, '{123}', '{{777, 888, 999}}', '2024-02-22'),
(1, '', '{{111, 222, 333}, {444, 555, 666}}', '2024-02-23'),
(1, '{}', '{}', '2024-02-24'),
(1, '{}', '{}', '2024-05-31'),
(2, null, null, null),
(2, '{9123, 9456}', '{{9111, 9222, 9333}, {9444, 9555, 9666}}', '2024-03-20'),
(2, '{9456, 9789}', '{{9111, 9222, 9333}}', '2024-03-21'),
(2, '{9123}', '{{9777, 9888, 9999}}', '2024-03-22'),
(2, '', '{{9111, 9222, 9333}, {9444, 9555, 9666}}', '2024-03-23'),
(2, '{}', '{}', '2024-03-29'),
(2, '{}', '{}', '2024-03-24');

select * from data;

-- Source: https://stackoverflow.com/a/8142998
CREATE OR REPLACE FUNCTION unnest_2d_1d(anyarray)
  RETURNS SETOF anyarray
  LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT AS
$func$
SELECT array_agg($1[d1][d2])
FROM   generate_subscripts($1,1) d1
    ,  generate_subscripts($1,2) d2
GROUP  BY d1
ORDER  BY d1
$func$;

with step1 as (
	select id,
		-- Ignore field if is null or empty
		array_agg(s) filter (where s is not null and s<>'' and s<>'{}') as s,
		array_agg(u) filter (where u is not null and u<>'' and u<>'{}') as u,
		max(start_time) as start_time
	from data
	group by id
),
step2 as (
	select id, array_agg(s) as s
	from
	(
		-- Remove duplications
		select distinct id, unnest(unnest(s)::varchar[]) as s
		from step1
	) as step2_internal
	group by id
),
step3 as (
	select id, array_agg(u) as u
	from
	(
		-- Remove duplications
		select distinct id, unnest_2d_1d(unnest(u::varchar[])::varchar[]) as u
		from step1
	) as step3_internal
	group by id
)
select step1.id,
	step1.start_time,
	--step1.s as orig_s,
	step2.s,
	--step1.u as orig_u,
	step3.u
from step1
join step2 on step1.id=step2.id
join step3 on step1.id=step3.id;