Skip to content

Instantly share code, notes, and snippets.

@xtream1101
Created April 21, 2017 14:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xtream1101/268ce0849817be7e8e58faaf274033e8 to your computer and use it in GitHub Desktop.
Save xtream1101/268ce0849817be7e8e58faaf274033e8 to your computer and use it in GitHub Desktop.
Dynamic QA query
#1
DROP FUNCTION qa_fields(character varying,character varying);
CREATE OR REPLACE FUNCTION qa_fields(schema_name VARCHAR, tbl_name VARCHAR)
-- The table def needs to be known before the function runs, but the number of fields returned is dynamic :(
-- Also need to correct names for the fields, the types will alwyas be FLOAT
RETURNS TABLE(a FLOAT, b FLOAT, c FLOAT, d FLOAT, e FLOAT, f FLOAT, g FLOAT, h FLOAT, i FLOAT, j FLOAT, k FLOAT, l FLOAT, m FLOAT, n FLOAT, o FLOAT, p FLOAT, q FLOAT, r FLOAT, s FLOAT, t FLOAT, u FLOAT, v FLOAT, w FLOAT, x FLOAT, ts DATE) AS
$func$
DECLARE field_name VARCHAR;
DECLARE qa_query_full TEXT = '';
DECLARE qa_query_percent VARCHAR = '';
DECLARE qa_query_sum VARCHAR = '';
BEGIN
FOR field_name IN SELECT column_name
FROM information_schema.columns
WHERE table_schema = schema_name
AND table_name = tbl_name
AND data_type != 'boolean'
-- Ignore these columns (these are not from the scraped data)
AND column_name not in ('id', 'scrape_id', 'keyword', 'keyword_id')
LOOP
-- Create the 2 selects for the dynamic fields
qa_query_percent := qa_query_percent || 'x.' || field_name || '_null/x.total::float*100 as ' || field_name || '_null_percent, ';
qa_query_sum := qa_query_sum || 'SUM(CASE WHEN ' || field_name || ' IS NULL THEN 1 ELSE 0 END) AS ' || field_name || '_null, ';
END LOOP;
-- Create the completed query
qa_query_full := 'SELECT ' || qa_query_percent || ' x.time_collected
FROM (SELECT
count(id) as total,
' || qa_query_sum || '
MIN(date_trunc(''day'', (time_collected)::timestamp)::date) as time_collected
FROM
' || schema_name || '.' || tbl_name || '
WHERE time_collected>=date_trunc(''day'', (now() + interval ''-4 hour'')::date - interval ''59 day'')::date
AND time_collected<=date_trunc(''day'', (now() + interval ''-4 hour'')::date + interval ''1 day'')::date
GROUP BY
date_trunc(''day'', (time_collected)::timestamp)::date
ORDER BY
MIN(date_trunc(''day'', (time_collected)::timestamp)::date) DESC
) as x;';
-- Execute query and return the results
RETURN QUERY EXECUTE qa_query_full;
END
$func$ LANGUAGE PLPGSQL;
-- Get the results for walmart.keywords_raw
SELECT b, c FROM qa_fields('walmart', 'keywords_raw');
DROP FUNCTION qa_fields(character varying,character varying);
CREATE OR REPLACE FUNCTION qa_fields(schema_name VARCHAR, tbl_name VARCHAR)
-- Return the custom query as TEXT, will need to execute on the client side somehow
RETURNS TEXT AS
$func$
DECLARE field_name VARCHAR;
DECLARE qa_query_full TEXT = '';
DECLARE qa_query_percent VARCHAR = '';
DECLARE qa_query_sum VARCHAR = '';
BEGIN
FOR field_name IN SELECT column_name
FROM information_schema.columns
WHERE table_schema = schema_name
AND table_name = tbl_name
AND data_type != 'boolean'
-- Ignore these columns (these are not from the scraped data)
AND column_name not in ('id', 'scrape_id', 'keyword', 'keyword_id')
LOOP
-- Create the 2 selects for the dynamic fields
qa_query_percent := qa_query_percent || 'x.' || field_name || '_null/x.total::float*100 as ' || field_name || '_null_percent, ';
qa_query_sum := qa_query_sum || 'SUM(CASE WHEN ' || field_name || ' IS NULL THEN 1 ELSE 0 END) AS ' || field_name || '_null, ';
END LOOP;
-- Create the completed query
qa_query_full := 'SELECT ' || qa_query_percent || ' x.time_collected
FROM (SELECT
count(id) as total,
' || qa_query_sum || '
MIN(date_trunc(''day'', (time_collected)::timestamp)::date) as time_collected
FROM
' || schema_name || '.' || tbl_name || '
WHERE time_collected>=date_trunc(''day'', (now() + interval ''-4 hour'')::date - interval ''59 day'')::date
AND time_collected<=date_trunc(''day'', (now() + interval ''-4 hour'')::date + interval ''1 day'')::date
GROUP BY
date_trunc(''day'', (time_collected)::timestamp)::date
ORDER BY
MIN(date_trunc(''day'', (time_collected)::timestamp)::date) DESC
) as x;';
-- Return the query as type TEXT
RETURN qa_query_full;
END
$func$ LANGUAGE PLPGSQL;
-- Returns a field called `qa_fields` with the query string in it
select qa_fields('walmart', 'keywords_raw');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment