Skip to content

Instantly share code, notes, and snippets.

@gullevek
Last active May 8, 2018 02:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gullevek/81b9d24039714af3726f4597e88d4223 to your computer and use it in GitHub Desktop.
Save gullevek/81b9d24039714af3726f4597e88d4223 to your computer and use it in GitHub Desktop.
PostgreSQL function to monitor progress for select -> insert flow
CREATE OR REPLACE FUNCTION copy_progress(from_date DATE, to_date DATE, p_precision INT DEFAULT 1) RETURNS "varchar"
AS $$
DECLARE
status VARCHAR; -- return status
my_rec RECORD; -- transfer record
-- progress info
pos INT := 1; -- current position
row_count INT := 0; -- overall row count (max)
percent NUMERIC; -- output percent
old_percent NUMERIC := 0.0; -- previous percent
percent_precision INT := 1; -- default precision, use function parameter to override
-- to char formats
percent_precision_format VARCHAR := '990D9';
-- count_format VARCHAR := '9G999G999G999'; -- fixed (no padding of empty space on the left)
count_format VARCHAR;
flex_number_format VARCHAR := 'FM9G999G999G999'; -- flex (will omit leading zeros)
time_format VARCHAR := 'FMHH24"h" FMMI"m" FMSS"s" US"ms"';
-- below is for time
start_all TIMESTAMP := timeofday(); -- overall start time
start_ts TIMESTAMP; -- start time after query is run (select)
start_time TIMESTAMP; -- start time of one segment
end_time TIMESTAMP; -- any end time for segment, overall run, etc
full_time_needed INTERVAL := '1 seconds'; -- the full time for the insert
overall_time_needed INTERVAL := '1 seconds'; -- the overall run time including select
query_time_needed INTERVAL := '1 seconds'; -- the select query time
last_group INTERVAL; -- last group (percent set) time
lines_processed INT; -- last group (percent set) rows
current_pos INT := 1; -- current position
full_time_per_line INTERVAL; -- overall time per lines inserted
eta INTERVAL; -- estimated finish time
data_per_sec NUMERIC; -- transfer per second
last_run_time NUMERIC; -- last run time for segment
BEGIN
-- override percent precision, if too large, fallback to default 100.0% format
IF p_precision IS NOT NULL AND p_precision >= 0 AND p_precision <= 3 THEN
percent_precision := p_precision;
-- set new output precision based on the value
percent_precision_format := '990';
IF p_precision > 0 THEN
percent_precision_format := percent_precision_format || 'D' || repeat('9', p_precision);
END IF;
END IF;
-- set output format for pos/row count (based on count format)
FOR my_rec IN
-- READ QUERY HERE
SELECT COUNT(*) OVER () AS ecount, ROW_NUMBER() OVER () AS epos,
[bar] -- HERE NEEDS TO BE THE ROWS YOU WANT TO READ
FROM [bar] -- TABLE YOU WANT TO READ
-- NEEDS DATE
WHERE [date] >= from_date AND [date] < to_date
LOOP
-- set start time if not set yet, so we get start time AFTER the query run
IF start_time IS NULL THEN
start_time := timeofday();
start_ts := start_time;
-- store query run time
query_time_needed := start_ts - start_all;
END IF;
-- set current position and max row count
pos := my_rec.epos;
row_count := my_rec.ecount;
-- fixed pos/row_count string length based on row_count
IF count_format IS NULL THEN
-- strip FM from flex_number_format (1), get length of row count (2), add G number spacers (for every three numbers) (3)
-- for every three steps there is a thousand seperator
count_format := RIGHT(REPLACE(flex_number_format, 'FM', ''), LENGTH(row_count::VARCHAR) + ((LENGTH(row_count::VARCHAR) - 1) / 3));
END IF;
-- INSERT QUERY HERE
INSERT INTO [foo] ([bar], [date]) VALUES (my_rec.[bar], ..., my_rec.[date]);
-- percent calculate
percent := ROUND((pos::NUMERIC / row_count::NUMERIC) * 100::NUMERIC, percent_precision);
-- if percent is different to previous one, run all the time calculations
IF percent <> old_percent THEN
-- get the end time for this segment
end_time := timeofday(); -- calc the run time since call
full_time_needed := end_time - start_ts; -- calc the run time for the last segment
last_group := end_time - start_time;
-- lines processed in the last run
lines_processed := pos - current_pos;
-- time needed in average for each row based on complete run time
full_time_per_line := full_time_needed / pos;
-- time to finish
eta := full_time_per_line * (row_count - pos);
-- fix possible unset eta
IF eta < '0 seconds' THEN
eta := '0 seconds';
END IF;
-- for data per seconds output
EXECUTE 'SELECT EXTRACT(EPOCH FROM INTERVAL ''' || full_time_needed || ''')::numeric' INTO last_run_time;
IF last_run_time <> 0 THEN
data_per_sec := ROUND(pos::numeric / last_run_time, 2)::numeric;
ELSE
data_per_sec := 0::numeric;
END IF;
-- progress output
RAISE NOTICE '[% ~ %] Processed% /% rows |% %% | ETA: %s / TR: %s / % rows/s / LR: % rows in %s',
from_date, to_date,
to_char(pos, count_format),
to_char(row_count, count_format),
to_char(percent, percent_precision_format),
to_char(eta, time_format),
to_char(full_time_needed, time_format),
to_char(data_per_sec, flex_number_format),
to_char(lines_processed, flex_number_format),
to_char(last_group, time_format);
-- next step keep
old_percent := percent;
current_pos := pos;
-- new start time for next segment
start_time := timeofday();
END IF;
END LOOP;
-- end time calculations
end_time := timeofday();
overall_time_needed := end_time - start_all;
full_time_needed := end_time - start_ts;
RAISE NOTICE 'COUNT FORMAT: %', count_format;
-- finished status
status := '[' || from_date || ' ~ ' || to_date || '] Copied ' || to_char(pos, flex_number_format) || ' rows in ' || to_char(full_time_needed, time_format) || ' and overall time ' || to_char(overall_time_needed, time_format) || ', Query time: ' || to_char(query_time_needed, time_format);
RETURN status;
END;
$$
LANGUAGE plpgsql;
@gullevek
Copy link
Author

Shows progress of a large data copy

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment