steve-chavez/relational-insert.sql Secret

## relational-insert.sql
-- PROPOSAL FOR GENERATED QUERIES FOR RELATIONAL INSERTS

-- Since we don't necessarily know the ids of the inserted child records..
-- We're going to rely on the fact that INSERT .. SELECT .. RETURNING preserves the order of the inserted records
-- As stated in:
-- https://stackoverflow.com/questions/5439293/is-insert-returning-guaranteed-to-return-things-in-the-right-order/27996203#27996203
-- https://www.mail-archive.com/pgsql-hackers@postgresql.org/msg253739.html

-- Also confirmed on IRC:
-- <steve-chavez> Hello. Is the order of the INSERTed rows preserved when using RETURNING?
-- <steve-chavez> I bumped into this https://www.mail-archive.com/pgsql-hackers@postgresql.org/msg253739.html. I'd like to double check.
-- <lluad> I would imagine that making promises about the order would complicate any future work on parallelizing inserts.
-- <steve-chavez> lluad: Thank you. But for now I guess it's safe to assume the order is preserved right?
-- <lluad> I've assumed that before. It seems like a reasonable assumption still.
-- <RhodiumToad> steve-chavez: as far as I know, in the current code the order won't be changed
--
-- We'll need to monitor changes in future versions in case this breaks, but for now this works.
-- Check at the bottom for a test that ensures this.

-- Using these tables
CREATE TABLE items (
  id serial PRIMARY KEY,
  name text NOT NULL
);

CREATE TABLE subitems (
  id serial PRIMARY KEY,
  name text NOT NULL,
  item_id integer not null REFERENCES items(id)
);

CREATE TABLE pieces (
  id serial PRIMARY KEY,
  name text NOT NULL,
  subitem_id integer not null REFERENCES subitems(id)
);

CREATE TABLE parts (
  id serial PRIMARY KEY,
  name text,
  item_id integer not null REFERENCES items(id)
);

-- We would produce these kind of queries for relational inserts

-- inserting items + subitems + pieces
-- No need to look at the payload in haskell code, we can get the info of what the user wants to insert from the querystring
-- POST /items?columns=name,subitems(name,pieces(name))
-- If the payload contains other attributes that are not specified in the querystring, they'll be ignored
WITH
payload AS (
  select $$
    [
      {"name": "item 1", "subitems": [
        {"name": "subitem 1", "pieces": [{"name": "piece 1"}, {"name": "piece 2"}, {"name": "piece 3"}]},
        {"name": "subitem 2", "pieces": [{"name": "piece 4"}, {"name": "piece 5"}, {"name": "piece 6"}]},
        {"name": "subitem 3", "pieces": [{"name": "piece 7"}, {"name": "piece 8"}]}]},
      {"name": "item 2", "subitems": [
        {"name": "subitem 4", "pieces": [{"name": "piece 9"}, {"name": "piece 10"}]},
        {"name": "subitem 5", "pieces": [{"name": "piece 11"}]}]}
    ]
  $$::json as data
),
-- get all the first level records, preserving the rest of the levels
p1 AS (
  select
    ordinality,
    name,
    subitems
  -- Here for json_to_recordset to work we need to know the types of the table columns.
  -- So, the schema cache would be queried
  from rows from (json_to_recordset((select data from payload)) as (name text, subitems json)) with ordinality
),
-- insert first level, order to make sure we preserve the order of the original payload
ins1 as (
  insert into items(name)
  select name
  from p1
  order by ordinality
  returning *
),
-- We need this kind of aux ctes because there's no RETURNING WITH ORDINALITY
aux1 as (
  select
    row_number() over () as ordinality,
    ins1.id
  from ins1
),
-- here we join to get the inserted ids of the previous level while associating the original payload 2nd level attributes
p2 as (
  select
    row_number() over () as ordinality,
    aux1.id as item_id,
    x.name,
    x.pieces
  from aux1
  join p1
    on p1.ordinality = aux1.ordinality
  join lateral(
    select *
    from json_to_recordset(p1.subitems) as (name text, pieces json)
  ) x on true
),
-- insert 2nd level, rest of ctes maintain the same idea as the previous ones
ins2 as (
  insert into subitems(name, item_id)
  select
    name,
    item_id
  from p2
  order by ordinality
  returning *
),
aux2 as (
  select
    row_number() over () as ordinality,
    ins2.id
  from ins2
),
p3 as (
  select
    x.name as piece_name,
    aux2.id as subitem_id
  from aux2
  join p2
    on p2.ordinality = aux2.ordinality
  join lateral(
    select *
    from json_to_recordset(p2.pieces) as (name text)
  ) x on true
),
ins3 as (
  insert into pieces(name, subitem_id)
  select * from p3
  returning *
)
select * from ins3;


-- inserting items + 2childs = parts + subitems
-- POST /items?columns=name,subitems(name),parts(name)
-- same idea as previous query
WITH
payload AS (
  select $$
    [
      { "name": "item 1",
        "subitems": [
          {"name": "subitem 1"},
          {"name": "subitem 2"},
          {"name": "subitem 3"}
        ],
        "parts": [
          {"name": "part 1"},
          {"name": "part 2"}
        ]
      },
      { "name": "item 2",
        "subitems": [
          {"name": "subitem 4"},
          {"name": "subitem 5"}
        ],
        "parts": [
          {"name": "part 3"},
          {"name": "part 4"}
        ]
      }
    ]
  $$::json as data
),
p1 as (
  select
    ordinality,
    name,
    subitems,
    parts
  from rows from (json_to_recordset((select data from payload)) as (name text, subitems json, parts json)) with ordinality
),
ins1 as (
  insert into items(name)
  select name
  from p1
  order by ordinality
  returning *
),
aux1 as (
  select
    row_number() over () as ordinality,
    ins1.id
  from ins1
),
p2_1 as (
  select
    row_number() over () as ordinality,
    aux1.id as item_id,
    x.name
  from aux1
  join p1
    on p1.ordinality = aux1.ordinality
  join lateral(
    select *
    from json_to_recordset(p1.subitems) as (name text)
  ) x on true
),
p2_2 as (
  select
    row_number() over () as ordinality,
    aux1.id as item_id,
    x.name
  from aux1
  join p1
    on p1.ordinality = aux1.ordinality
  join lateral(
    select *
    from json_to_recordset(p1.parts) as (name text)
  ) x on true
),
ins2_1 as (
  insert into subitems(name, item_id)
  select
    name,
    item_id
  from p2_1
  order by ordinality
  returning *
),
ins2_2 as (
  insert into parts(name, item_id)
  select
    name,
    item_id
  from p2_2
  order by ordinality
  returning *
)
select * from ins2_2;

-- insert a million rows to ensure returning returns rows in order
-- Basically we generate a million item-x paired with subitem-x, x being in {1,2,3...1000000)
-- We insert the item-x first then get the generated pks and join with the original payload to get subitem-x
-- If all the rows have item-x subitem-y where x = y then the order of the inserts is preserved
WITH
payload AS (
  select 'item-' || x as item_name, 'subitem-' || x as subitem_name from generate_series(1, 1000000) x
),
p1 as (
  select
    row_number() over () as ordinality,
    item_name,
    subitem_name
  from payload
),
ins1 as (
  insert into items(name)
  select item_name
  from p1
  order by ordinality
  returning *
),
aux1 as (
  select
    row_number() over () as ordinality,
    ins1.*
  from ins1
),
check_all as (
  select
    aux1.id as item_id,
    aux1.name as item_name,
    p1.subitem_name
  from aux1
  join p1
    on p1.ordinality = aux1.ordinality
  where
    split_part(item_name, '-', 2) = split_part(subitem_name, '-', 2))
select count(*) = 1000000 from check_all;