gullevek/progress_data_import.sql

## progress_data_import.sql
CREATE OR REPLACE FUNCTION copy_progress(from_date DATE, to_date DATE, p_precision INT DEFAULT 1) RETURNS "varchar"
AS $$
    DECLARE
        status VARCHAR; -- return status
        my_rec RECORD; -- transfer record
        -- progress info
        pos INT := 1; -- current position
        row_count INT := 0; -- overall row count (max)
        percent NUMERIC; -- output percent
        old_percent NUMERIC := 0.0; -- previous percent
        percent_precision INT := 1; -- default precision, use function parameter to override
        -- to char formats
        percent_precision_format VARCHAR := '990D9';
--      count_format VARCHAR := '9G999G999G999'; -- fixed (no padding of empty space on the left)
        count_format VARCHAR;
        flex_number_format VARCHAR := 'FM9G999G999G999'; -- flex (will omit leading zeros)
        time_format VARCHAR := 'FMHH24"h" FMMI"m" FMSS"s" US"ms"';
        -- below is for time
        start_all TIMESTAMP := timeofday(); -- overall start time
        start_ts TIMESTAMP; -- start time after query is run (select)
        start_time TIMESTAMP; -- start time of one segment
        end_time TIMESTAMP; -- any end time for segment, overall run, etc
        full_time_needed INTERVAL := '1 seconds'; -- the full time for the insert
        overall_time_needed INTERVAL := '1 seconds'; -- the overall run time including select
        query_time_needed INTERVAL := '1 seconds'; -- the select query time
        last_group INTERVAL; -- last group (percent set) time
        lines_processed INT; -- last group (percent set) rows
        current_pos INT := 1; -- current position
        full_time_per_line INTERVAL; -- overall time per lines inserted
        eta INTERVAL; -- estimated finish time
        data_per_sec NUMERIC; -- transfer per second
        last_run_time NUMERIC; -- last run time for segment
    BEGIN
        -- override percent precision, if too large, fallback to default 100.0% format
        IF p_precision IS NOT NULL AND p_precision >= 0 AND p_precision <= 3 THEN
            percent_precision := p_precision;
            -- set new output precision based on the value
            percent_precision_format := '990';
            IF p_precision > 0 THEN
                percent_precision_format := percent_precision_format || 'D' || repeat('9', p_precision);
            END IF;
        END IF;
        -- set output format for pos/row count (based on count format)
        FOR my_rec IN
            -- READ QUERY HERE
            SELECT COUNT(*) OVER () AS ecount, ROW_NUMBER() OVER () AS epos,
            [bar] -- HERE NEEDS TO BE THE ROWS YOU WANT TO READ
            FROM [bar] -- TABLE YOU WANT TO READ
            -- NEEDS DATE
            WHERE [date] >= from_date AND [date] < to_date
        LOOP
            -- set start time if not set yet, so we get start time AFTER the query run
            IF start_time IS NULL THEN
                start_time := timeofday();
                start_ts := start_time;
                -- store query run time
                query_time_needed := start_ts - start_all;
            END IF;
            -- set current position and max row count
            pos := my_rec.epos;
            row_count := my_rec.ecount;
            -- fixed pos/row_count string length based on row_count
            IF count_format IS NULL THEN
                -- strip FM from flex_number_format (1), get length of row count (2), add G number spacers (for every three numbers) (3)
                -- for every three steps there is a thousand seperator
                count_format := RIGHT(REPLACE(flex_number_format, 'FM', ''), LENGTH(row_count::VARCHAR) + ((LENGTH(row_count::VARCHAR) - 1) / 3));
            END IF;

            -- INSERT QUERY HERE
            INSERT INTO [foo] ([bar], [date]) VALUES (my_rec.[bar], ..., my_rec.[date]);

            -- percent calculate
            percent := ROUND((pos::NUMERIC / row_count::NUMERIC) * 100::NUMERIC, percent_precision);
            -- if percent is different to previous one, run all the time calculations
            IF percent <> old_percent THEN
                -- get the end time for this segment
                end_time := timeofday();                                                                                                                              -- calc the run time since call
                full_time_needed := end_time - start_ts;                                                                                                              -- calc the run time for the last segment
                last_group := end_time - start_time;
                -- lines processed in the last run
                lines_processed := pos - current_pos;
                -- time needed in average for each row based on complete run time
                full_time_per_line := full_time_needed / pos;
                -- time to finish
                eta := full_time_per_line * (row_count - pos);
                -- fix possible unset eta
                IF eta < '0 seconds' THEN
                    eta := '0 seconds';
                END IF;
                -- for data per seconds output
                EXECUTE 'SELECT EXTRACT(EPOCH FROM INTERVAL ''' || full_time_needed || ''')::numeric' INTO last_run_time;
                IF last_run_time <> 0 THEN
                    data_per_sec := ROUND(pos::numeric / last_run_time, 2)::numeric;
                ELSE
                    data_per_sec := 0::numeric;
                END IF;
                -- progress output
                RAISE NOTICE '[% ~ %] Processed% /% rows |% %% | ETA: %s / TR: %s / % rows/s / LR: % rows in %s',
                    from_date, to_date,
                    to_char(pos, count_format),
                    to_char(row_count, count_format),
                    to_char(percent, percent_precision_format),
                    to_char(eta, time_format),
                    to_char(full_time_needed, time_format),
                    to_char(data_per_sec, flex_number_format),
                    to_char(lines_processed, flex_number_format),
                    to_char(last_group, time_format);
                -- next step keep
                old_percent := percent;
                current_pos := pos;
                -- new start time for next segment
                start_time := timeofday();
            END IF;
        END LOOP;
        -- end time calculations
        end_time := timeofday();
        overall_time_needed := end_time - start_all;
        full_time_needed := end_time - start_ts;
        RAISE NOTICE 'COUNT FORMAT: %', count_format;
        -- finished status
        status := '[' || from_date || ' ~ ' || to_date || '] Copied ' || to_char(pos, flex_number_format) || ' rows in ' || to_char(full_time_needed, time_format) || ' and overall time ' || to_char(overall_time_needed, time_format) || ', Query time: ' || to_char(query_time_needed, time_format);
        RETURN status;
    END;
$$
    LANGUAGE plpgsql;
	CREATE OR REPLACE FUNCTION copy_progress(from_date DATE, to_date DATE, p_precision INT DEFAULT 1) RETURNS "varchar"
	AS $$
	DECLARE
	status VARCHAR; -- return status
	my_rec RECORD; -- transfer record
	-- progress info
	pos INT := 1; -- current position
	row_count INT := 0; -- overall row count (max)
	percent NUMERIC; -- output percent
	old_percent NUMERIC := 0.0; -- previous percent
	percent_precision INT := 1; -- default precision, use function parameter to override
	-- to char formats
	percent_precision_format VARCHAR := '990D9';
	-- count_format VARCHAR := '9G999G999G999'; -- fixed (no padding of empty space on the left)
	count_format VARCHAR;
	flex_number_format VARCHAR := 'FM9G999G999G999'; -- flex (will omit leading zeros)
	time_format VARCHAR := 'FMHH24"h" FMMI"m" FMSS"s" US"ms"';
	-- below is for time
	start_all TIMESTAMP := timeofday(); -- overall start time
	start_ts TIMESTAMP; -- start time after query is run (select)
	start_time TIMESTAMP; -- start time of one segment
	end_time TIMESTAMP; -- any end time for segment, overall run, etc
	full_time_needed INTERVAL := '1 seconds'; -- the full time for the insert
	overall_time_needed INTERVAL := '1 seconds'; -- the overall run time including select
	query_time_needed INTERVAL := '1 seconds'; -- the select query time
	last_group INTERVAL; -- last group (percent set) time
	lines_processed INT; -- last group (percent set) rows
	current_pos INT := 1; -- current position
	full_time_per_line INTERVAL; -- overall time per lines inserted
	eta INTERVAL; -- estimated finish time
	data_per_sec NUMERIC; -- transfer per second
	last_run_time NUMERIC; -- last run time for segment
	BEGIN
	-- override percent precision, if too large, fallback to default 100.0% format
	IF p_precision IS NOT NULL AND p_precision >= 0 AND p_precision <= 3 THEN
	percent_precision := p_precision;
	-- set new output precision based on the value
	percent_precision_format := '990';
	IF p_precision > 0 THEN
	percent_precision_format := percent_precision_format \|\| 'D' \|\| repeat('9', p_precision);
	END IF;
	END IF;
	-- set output format for pos/row count (based on count format)
	FOR my_rec IN
	-- READ QUERY HERE
	SELECT COUNT(*) OVER () AS ecount, ROW_NUMBER() OVER () AS epos,
	[bar] -- HERE NEEDS TO BE THE ROWS YOU WANT TO READ
	FROM [bar] -- TABLE YOU WANT TO READ
	-- NEEDS DATE
	WHERE [date] >= from_date AND [date] < to_date
	LOOP
	-- set start time if not set yet, so we get start time AFTER the query run
	IF start_time IS NULL THEN
	start_time := timeofday();
	start_ts := start_time;
	-- store query run time
	query_time_needed := start_ts - start_all;
	END IF;
	-- set current position and max row count
	pos := my_rec.epos;
	row_count := my_rec.ecount;
	-- fixed pos/row_count string length based on row_count
	IF count_format IS NULL THEN
	-- strip FM from flex_number_format (1), get length of row count (2), add G number spacers (for every three numbers) (3)
	-- for every three steps there is a thousand seperator
	count_format := RIGHT(REPLACE(flex_number_format, 'FM', ''), LENGTH(row_count::VARCHAR) + ((LENGTH(row_count::VARCHAR) - 1) / 3));
	END IF;

	-- INSERT QUERY HERE
	INSERT INTO [foo] ([bar], [date]) VALUES (my_rec.[bar], ..., my_rec.[date]);

	-- percent calculate
	percent := ROUND((pos::NUMERIC / row_count::NUMERIC) * 100::NUMERIC, percent_precision);
	-- if percent is different to previous one, run all the time calculations
	IF percent <> old_percent THEN
	-- get the end time for this segment
	end_time := timeofday(); -- calc the run time since call
	full_time_needed := end_time - start_ts; -- calc the run time for the last segment
	last_group := end_time - start_time;
	-- lines processed in the last run
	lines_processed := pos - current_pos;
	-- time needed in average for each row based on complete run time
	full_time_per_line := full_time_needed / pos;
	-- time to finish
	eta := full_time_per_line * (row_count - pos);
	-- fix possible unset eta
	IF eta < '0 seconds' THEN
	eta := '0 seconds';
	END IF;
	-- for data per seconds output
	EXECUTE 'SELECT EXTRACT(EPOCH FROM INTERVAL ''' \|\| full_time_needed \|\| ''')::numeric' INTO last_run_time;
	IF last_run_time <> 0 THEN
	data_per_sec := ROUND(pos::numeric / last_run_time, 2)::numeric;
	ELSE
	data_per_sec := 0::numeric;
	END IF;
	-- progress output
	RAISE NOTICE '[% ~ %] Processed% /% rows \|% %% \| ETA: %s / TR: %s / % rows/s / LR: % rows in %s',
	from_date, to_date,
	to_char(pos, count_format),
	to_char(row_count, count_format),
	to_char(percent, percent_precision_format),
	to_char(eta, time_format),
	to_char(full_time_needed, time_format),
	to_char(data_per_sec, flex_number_format),
	to_char(lines_processed, flex_number_format),
	to_char(last_group, time_format);
	-- next step keep
	old_percent := percent;
	current_pos := pos;
	-- new start time for next segment
	start_time := timeofday();
	END IF;
	END LOOP;
	-- end time calculations
	end_time := timeofday();
	overall_time_needed := end_time - start_all;
	full_time_needed := end_time - start_ts;
	RAISE NOTICE 'COUNT FORMAT: %', count_format;
	-- finished status
	status := '[' \|\| from_date \|\| ' ~ ' \|\| to_date \|\| '] Copied ' \|\| to_char(pos, flex_number_format) \|\| ' rows in ' \|\| to_char(full_time_needed, time_format) \|\| ' and overall time ' \|\| to_char(overall_time_needed, time_format) \|\| ', Query time: ' \|\| to_char(query_time_needed, time_format);
	RETURN status;
	END;
	$$
	LANGUAGE plpgsql;