Skip to content

Instantly share code, notes, and snippets.

@borislitvak
Last active May 1, 2022 12:09
Show Gist options
  • Save borislitvak/f7ffc3e046233754d99e19c6af4d2657 to your computer and use it in GitHub Desktop.
Save borislitvak/f7ffc3e046233754d99e19c6af4d2657 to your computer and use it in GitHub Desktop.
BQ ELT
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/borislitvak/f7ffc3e046233754d99e19c6af4d2657/bq-elt.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "zU5b6dlRwUQk"
},
"outputs": [],
"source": [
"from google.colab import auth\n",
"auth.authenticate_user()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"id": "vBuUW-q41tb7"
},
"outputs": [],
"source": [
"from google.cloud import bigquery\n",
"# TODO: Set the following parameters. Your dataset location must be US.\n",
"client = bigquery.Client(project='your-project')\n",
"table_id = \"dataset-in-us-location.your-table\"\n",
"data_uri = \"gs://cloud-samples-data/bigquery/tutorials/github.json\""
]
},
{
"cell_type": "code",
"source": [
"job_config = bigquery.LoadJobConfig(\n",
" schema=[\n",
" bigquery.SchemaField(\"commit\", \"STRING\", mode=\"REQUIRED\"),\n",
" # TODO: Run twice, with and without this code:\n",
" # bigquery.SchemaField(\"repo_name\", \"STRING\"),\n",
" ],\n",
" source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,\n",
" schema_update_options = [ bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION ],\n",
" ignore_unknown_values=True\n",
")\n",
"\n",
"load_job = client.load_table_from_uri(data_uri, table_id, job_config=job_config) \n",
"load_job.result() # Waits for the job to complete.\n",
"\n",
"display(client.query(f\"SELECT * FROM {table_id} LIMIT 10\").result().to_dataframe())"
],
"metadata": {
"id": "K0USbt59p0as"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "BQ ELT",
"toc_visible": true,
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment