Skip to content

Instantly share code, notes, and snippets.

@bityob
Last active February 28, 2024 22:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bityob/181656ae4c3c7f22155523dcfef579ed to your computer and use it in GitHub Desktop.
Save bityob/181656ae4c3c7f22155523dcfef579ed to your computer and use it in GitHub Desktop.
Postgres - Aggregate multiple rows to 1 row with removing duplicates, null, empty fields on text, array and 2D array types

What

We want to have 1 row by id, but not just aggregating all values with simple array_agg, but removing null and empty fields, removing duplicates, and doing it for array field and 2D array field too.

Table before agg logic

image

Table after agg logic

image

--drop table data
--truncate table data
create temp table data (id int, s text, u text, start_time timestamp);
insert into data values
(1, null, null, null),
(1, '{123, 456}', '{{111, 222, 333}, {444, 555, 666}}', '2024-02-20'),
(1, '{456, 789}', '{{111, 222, 333}}', '2024-02-21'),
(1, '{123}', '{{777, 888, 999}}', '2024-02-22'),
(1, '', '{{111, 222, 333}, {444, 555, 666}}', '2024-02-23'),
(1, '{}', '{}', '2024-02-24'),
(1, '{}', '{}', '2024-05-31'),
(2, null, null, null),
(2, '{9123, 9456}', '{{9111, 9222, 9333}, {9444, 9555, 9666}}', '2024-03-20'),
(2, '{9456, 9789}', '{{9111, 9222, 9333}}', '2024-03-21'),
(2, '{9123}', '{{9777, 9888, 9999}}', '2024-03-22'),
(2, '', '{{9111, 9222, 9333}, {9444, 9555, 9666}}', '2024-03-23'),
(2, '{}', '{}', '2024-03-29'),
(2, '{}', '{}', '2024-03-24');
select * from data;
-- Source: https://stackoverflow.com/a/8142998
CREATE OR REPLACE FUNCTION unnest_2d_1d(anyarray)
RETURNS SETOF anyarray
LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT AS
$func$
SELECT array_agg($1[d1][d2])
FROM generate_subscripts($1,1) d1
, generate_subscripts($1,2) d2
GROUP BY d1
ORDER BY d1
$func$;
with step1 as (
select id,
-- Ignore field if is null or empty
array_agg(s) filter (where s is not null and s<>'' and s<>'{}') as s,
array_agg(u) filter (where u is not null and u<>'' and u<>'{}') as u,
max(start_time) as start_time
from data
group by id
),
step2 as (
select id, array_agg(s) as s
from
(
-- Remove duplications
select distinct id, unnest(unnest(s)::varchar[]) as s
from step1
) as step2_internal
group by id
),
step3 as (
select id, array_agg(u) as u
from
(
-- Remove duplications
select distinct id, unnest_2d_1d(unnest(u::varchar[])::varchar[]) as u
from step1
) as step3_internal
group by id
)
select step1.id,
step1.start_time,
--step1.s as orig_s,
step2.s,
--step1.u as orig_u,
step3.u
from step1
join step2 on step1.id=step2.id
join step3 on step1.id=step3.id;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment