Last active
May 1, 2022 12:09
-
-
Save borislitvak/f7ffc3e046233754d99e19c6af4d2657 to your computer and use it in GitHub Desktop.
BQ ELT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/borislitvak/f7ffc3e046233754d99e19c6af4d2657/bq-elt.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"id": "zU5b6dlRwUQk" | |
}, | |
"outputs": [], | |
"source": [ | |
"from google.colab import auth\n", | |
"auth.authenticate_user()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"id": "vBuUW-q41tb7" | |
}, | |
"outputs": [], | |
"source": [ | |
"from google.cloud import bigquery\n", | |
"# TODO: Set the following parameters. Your dataset location must be US.\n", | |
"client = bigquery.Client(project='your-project')\n", | |
"table_id = \"dataset-in-us-location.your-table\"\n", | |
"data_uri = \"gs://cloud-samples-data/bigquery/tutorials/github.json\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"job_config = bigquery.LoadJobConfig(\n", | |
" schema=[\n", | |
" bigquery.SchemaField(\"commit\", \"STRING\", mode=\"REQUIRED\"),\n", | |
" # TODO: Run twice, with and without this code:\n", | |
" # bigquery.SchemaField(\"repo_name\", \"STRING\"),\n", | |
" ],\n", | |
" source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,\n", | |
" schema_update_options = [ bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION ],\n", | |
" ignore_unknown_values=True\n", | |
")\n", | |
"\n", | |
"load_job = client.load_table_from_uri(data_uri, table_id, job_config=job_config) \n", | |
"load_job.result() # Waits for the job to complete.\n", | |
"\n", | |
"display(client.query(f\"SELECT * FROM {table_id} LIMIT 10\").result().to_dataframe())" | |
], | |
"metadata": { | |
"id": "K0USbt59p0as" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"colab": { | |
"collapsed_sections": [], | |
"name": "BQ ELT", | |
"toc_visible": true, | |
"provenance": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"name": "python3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment