Skip to content

Instantly share code, notes, and snippets.

@steve-chavez
Last active October 1, 2024 20:02
Show Gist options
  • Save steve-chavez/60473d1765b5175012f5cc15695ae0b1 to your computer and use it in GitHub Desktop.
Save steve-chavez/60473d1765b5175012f5cc15695ae0b1 to your computer and use it in GitHub Desktop.
-- PROPOSAL FOR GENERATED QUERIES FOR RELATIONAL INSERTS
-- Since we don't necessarily know the ids of the inserted child records..
-- We're going to rely on the fact that INSERT .. SELECT .. RETURNING preserves the order of the inserted records
-- As stated in:
-- https://stackoverflow.com/questions/5439293/is-insert-returning-guaranteed-to-return-things-in-the-right-order/27996203#27996203
-- https://www.mail-archive.com/pgsql-hackers@postgresql.org/msg253739.html
-- Also confirmed on IRC:
-- <steve-chavez> Hello. Is the order of the INSERTed rows preserved when using RETURNING?
-- <steve-chavez> I bumped into this https://www.mail-archive.com/pgsql-hackers@postgresql.org/msg253739.html. I'd like to double check.
-- <lluad> I would imagine that making promises about the order would complicate any future work on parallelizing inserts.
-- <steve-chavez> lluad: Thank you. But for now I guess it's safe to assume the order is preserved right?
-- <lluad> I've assumed that before. It seems like a reasonable assumption still.
-- <RhodiumToad> steve-chavez: as far as I know, in the current code the order won't be changed
--
-- We'll need to monitor changes in future versions in case this breaks, but for now this works.
-- Check at the bottom for a test that ensures this.
-- Using these tables
CREATE TABLE items (
id serial PRIMARY KEY,
name text NOT NULL
);
CREATE TABLE subitems (
id serial PRIMARY KEY,
name text NOT NULL,
item_id integer not null REFERENCES items(id)
);
CREATE TABLE pieces (
id serial PRIMARY KEY,
name text NOT NULL,
subitem_id integer not null REFERENCES subitems(id)
);
CREATE TABLE parts (
id serial PRIMARY KEY,
name text,
item_id integer not null REFERENCES items(id)
);
-- We would produce these kind of queries for relational inserts
-- inserting items + subitems + pieces
-- No need to look at the payload in haskell code, we can get the info of what the user wants to insert from the querystring
-- POST /items?columns=name,subitems(name,pieces(name))
-- If the payload contains other attributes that are not specified in the querystring, they'll be ignored
WITH
payload AS (
select $$
[
{"name": "item 1", "subitems": [
{"name": "subitem 1", "pieces": [{"name": "piece 1"}, {"name": "piece 2"}, {"name": "piece 3"}]},
{"name": "subitem 2", "pieces": [{"name": "piece 4"}, {"name": "piece 5"}, {"name": "piece 6"}]},
{"name": "subitem 3", "pieces": [{"name": "piece 7"}, {"name": "piece 8"}]}]},
{"name": "item 2", "subitems": [
{"name": "subitem 4", "pieces": [{"name": "piece 9"}, {"name": "piece 10"}]},
{"name": "subitem 5", "pieces": [{"name": "piece 11"}]}]}
]
$$::json as data
),
-- get all the first level records, preserving the rest of the levels
p1 AS (
select
ordinality,
name,
subitems
-- Here for json_to_recordset to work we need to know the types of the table columns.
-- So, the schema cache would be queried
from rows from (json_to_recordset((select data from payload)) as (name text, subitems json)) with ordinality
),
-- insert first level, order to make sure we preserve the order of the original payload
ins1 as (
insert into items(name)
select name
from p1
order by ordinality
returning *
),
-- We need this kind of aux ctes because there's no RETURNING WITH ORDINALITY
aux1 as (
select
row_number() over () as ordinality,
ins1.id
from ins1
),
-- here we join to get the inserted ids of the previous level while associating the original payload 2nd level attributes
p2 as (
select
row_number() over () as ordinality,
aux1.id as item_id,
x.name,
x.pieces
from aux1
join p1
on p1.ordinality = aux1.ordinality
join lateral(
select *
from json_to_recordset(p1.subitems) as (name text, pieces json)
) x on true
),
-- insert 2nd level, rest of ctes maintain the same idea as the previous ones
ins2 as (
insert into subitems(name, item_id)
select
name,
item_id
from p2
order by ordinality
returning *
),
aux2 as (
select
row_number() over () as ordinality,
ins2.id
from ins2
),
p3 as (
select
x.name as piece_name,
aux2.id as subitem_id
from aux2
join p2
on p2.ordinality = aux2.ordinality
join lateral(
select *
from json_to_recordset(p2.pieces) as (name text)
) x on true
),
ins3 as (
insert into pieces(name, subitem_id)
select * from p3
returning *
)
select * from ins3;
-- inserting items + 2childs = parts + subitems
-- POST /items?columns=name,subitems(name),parts(name)
-- same idea as previous query
WITH
payload AS (
select $$
[
{ "name": "item 1",
"subitems": [
{"name": "subitem 1"},
{"name": "subitem 2"},
{"name": "subitem 3"}
],
"parts": [
{"name": "part 1"},
{"name": "part 2"}
]
},
{ "name": "item 2",
"subitems": [
{"name": "subitem 4"},
{"name": "subitem 5"}
],
"parts": [
{"name": "part 3"},
{"name": "part 4"}
]
}
]
$$::json as data
),
p1 as (
select
ordinality,
name,
subitems,
parts
from rows from (json_to_recordset((select data from payload)) as (name text, subitems json, parts json)) with ordinality
),
ins1 as (
insert into items(name)
select name
from p1
order by ordinality
returning *
),
aux1 as (
select
row_number() over () as ordinality,
ins1.id
from ins1
),
p2_1 as (
select
row_number() over () as ordinality,
aux1.id as item_id,
x.name
from aux1
join p1
on p1.ordinality = aux1.ordinality
join lateral(
select *
from json_to_recordset(p1.subitems) as (name text)
) x on true
),
p2_2 as (
select
row_number() over () as ordinality,
aux1.id as item_id,
x.name
from aux1
join p1
on p1.ordinality = aux1.ordinality
join lateral(
select *
from json_to_recordset(p1.parts) as (name text)
) x on true
),
ins2_1 as (
insert into subitems(name, item_id)
select
name,
item_id
from p2_1
order by ordinality
returning *
),
ins2_2 as (
insert into parts(name, item_id)
select
name,
item_id
from p2_2
order by ordinality
returning *
)
select * from ins2_2;
-- insert a million rows to ensure returning returns rows in order
-- Basically we generate a million item-x paired with subitem-x, x being in {1,2,3...1000000)
-- We insert the item-x first then get the generated pks and join with the original payload to get subitem-x
-- If all the rows have item-x subitem-y where x = y then the order of the inserts is preserved
WITH
payload AS (
select 'item-' || x as item_name, 'subitem-' || x as subitem_name from generate_series(1, 1000000) x
),
p1 as (
select
row_number() over () as ordinality,
item_name,
subitem_name
from payload
),
ins1 as (
insert into items(name)
select item_name
from p1
order by ordinality
returning *
),
aux1 as (
select
row_number() over () as ordinality,
ins1.*
from ins1
),
check_all as (
select
aux1.id as item_id,
aux1.name as item_name,
p1.subitem_name
from aux1
join p1
on p1.ordinality = aux1.ordinality
where
split_part(item_name, '-', 2) = split_part(subitem_name, '-', 2))
select count(*) = 1000000 from check_all;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment