Skip to content

Instantly share code, notes, and snippets.

@marcocitus
Last active March 12, 2019 04:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marcocitus/1022701a1c9fc468c7b8fc8d7bf17f2c to your computer and use it in GitHub Desktop.
Save marcocitus/1022701a1c9fc468c7b8fc8d7bf17f2c to your computer and use it in GitHub Desktop.
Load data from GitHub in postgres
CREATE OR REPLACE FUNCTION public.load_github_data(events_date date, hour integer)
RETURNS void
LANGUAGE plpgsql
SECURITY DEFINER
AS $function$
BEGIN
CREATE TEMPORARY TABLE input (data jsonb) ON COMMIT DROP;
EXECUTE format($$COPY input FROM PROGRAM 'curl -s http://data.githubarchive.org/%s-%s.json.gz | zcat | grep -v \\u0000' CSV QUOTE e'\x01' DELIMITER e'\x02'$$, events_date, hour);
INSERT INTO github.events SELECT
(data->>'id')::bigint AS event_id,
(data->>'type')::text AS event_type,
(data->>'public')::boolean AS event_public,
(data->'repo'->>'id')::bigint AS repo_id,
(data->'payload') AS payload,
(data->'repo') AS repo,
(data->'actor') AS actor,
(data->'org') AS org,
(data->>'created_at')::timestamp AS created_at FROM input;
DROP TABLE input;
END;
$function$;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment