Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Load data from GitHub in postgres
CREATE OR REPLACE FUNCTION public.load_github_data(events_date date, hour integer)
RETURNS void
LANGUAGE plpgsql
SECURITY DEFINER
AS $function$
BEGIN
CREATE TEMPORARY TABLE input (data jsonb) ON COMMIT DROP;
EXECUTE format($$COPY input FROM PROGRAM 'curl -s http://data.githubarchive.org/%s-%s.json.gz | zcat | grep -v \\u0000' CSV QUOTE e'\x01' DELIMITER e'\x02'$$, events_date, hour);
INSERT INTO github.events SELECT
(data->>'id')::bigint AS event_id,
(data->>'type')::text AS event_type,
(data->>'public')::boolean AS event_public,
(data->'repo'->>'id')::bigint AS repo_id,
(data->'payload') AS payload,
(data->'repo') AS repo,
(data->'actor') AS actor,
(data->'org') AS org,
(data->>'created_at')::timestamp AS created_at FROM input;
DROP TABLE input;
END;
$function$;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.