Skip to content

Instantly share code, notes, and snippets.

@Anexen
Last active October 15, 2023 20:07
Show Gist options
  • Save Anexen/e1be322070aeb0d679e3aa636edc9627 to your computer and use it in GitHub Desktop.
Save Anexen/e1be322070aeb0d679e3aa636edc9627 to your computer and use it in GitHub Desktop.
PostgreSQL procedure to update large table in batches
-- usage:
-- CALL update_table_in_batches('table_1', array['a = 1'], batch_size := 10);
-- CALL update_table_in_batches('table_2', array['b = 2'], track_by := 'created', initial_value := '2021-01-01'::timestamp);
CREATE OR REPLACE PROCEDURE update_table_in_batches(
target_table text,
set_expressions text[],
track_by text default 'id',
initial_value anyelement default 0,
batch_size integer default 1000,
__last_value anyelement default null
) AS $body$
DECLARE
updated_count integer;
start_time timestamptz := clock_timestamp();
BEGIN
LOOP
SELECT initial_value INTO __last_value;
EXECUTE format($$
WITH updated_rows AS (
UPDATE %1$I
SET $$ || array_to_string(set_expressions, ',') || $$
WHERE %2$I IN (
SELECT %2$I
FROM %1$I
WHERE %2$I > %3$L
LIMIT %4$L
)
RETURNING %2$I
)
SELECT count(%2$I), max(%2$I) FROM updated_rows
$$, target_table, track_by, initial_value, batch_size)
INTO updated_count, initial_value;
COMMIT;
IF updated_count > 0 AND initial_value = __last_value THEN
RAISE EXCEPTION 'Cycle detected: some rows (%) were updated, but the tracked value has not changed (%). Try to increase the batch_size.',
updated_count,
initial_value;
END IF;
IF updated_count = 0 THEN
EXIT;
END IF;
RAISE NOTICE 'Last value: %, updated: %, time spent: %',
initial_value,
updated_count,
clock_timestamp() - start_time;
END LOOP;
END
$body$ LANGUAGE plpgsql;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment