Skip to content

Instantly share code, notes, and snippets.

@magic-lantern
Last active September 29, 2017 20:50
Show Gist options
  • Save magic-lantern/904e22ca625404da489dab4f2706fdc7 to your computer and use it in GitHub Desktop.
Save magic-lantern/904e22ca625404da489dab4f2706fdc7 to your computer and use it in GitHub Desktop.
Google Cloud BigQuery Example
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Google Cloud BigQuery Example\n",
"\n",
"The main goal of this example is to describe and demonstrate authentication methods for use with Google Cloud BigQuery as well as show some simple examples of how to retrieve data from BigQuery using the google.cloud.bigquery Python API.\n",
"\n",
"Some initial setup is required. Key points:\n",
"- Install required python libraries\n",
"- Determine desired authentication & authorization method\n",
"- Setup necessary items for authentication & authorization\n",
"- Write your query\n",
"\n",
"This example code will automatically attempt to perform authentication & authorization according to the following hirearchy:\n",
"1. Look in the current environment for credentials\n",
"1. Look for a credential file. See [https://cloud.google.com/docs/authentication/production#obtaining_and_providing_service_account_credentials_manually](https://cloud.google.com/docs/authentication/production#obtaining_and_providing_service_account_credentials_manually) for some guidance.\n",
"1. Perform OAuth2 authentication & authorization. In order to do this, a client identifier is required. See the following references:\n",
" - [https://cloud.google.com/storage/docs/json_api/v1/how-tos/authorizing](https://cloud.google.com/storage/docs/json_api/v1/how-tos/authorizing)\n",
" - [https://support.google.com/cloud/answer/6158857?hl=en](https://support.google.com/cloud/answer/6158857?hl=en)\n",
" - [https://support.google.com/cloud/answer/6158849](https://support.google.com/cloud/answer/6158849)\n",
" \n",
"For a brief overview of how OAuth2 works, see: [https://aaronparecki.com/oauth-2-simplified/](https://aaronparecki.com/oauth-2-simplified/)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Only need to run this cell the first time notebook used.\n",
"# In Datalab, uncomment these two lines\n",
"# !pip install --upgrade google_auth_oauthlib\n",
"# !pip install --upgrade google.cloud\n",
"# Outside datalab, run this via command line:\n",
"# pip install --upgrade google_auth_oauthlib\n",
"# pip install --upgrade google.cloud"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"from uuid import uuid4\n",
"import time\n",
"\n",
"from google.cloud import bigquery\n",
"from google.auth.exceptions import DefaultCredentialsError\n",
"from google_auth_oauthlib import flow"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# due to the fall through nature of this authentication & authorization code, it can take up to 20 seconds to run\n",
"\n",
"# setup variables for this cell\n",
"project = 'my-gcp-project'\n",
"launch_browser = True # set to False if running via commandline environment only\n",
"\n",
"try:\n",
" client = bigquery.Client(project = project)\n",
" print \"Loaded credentials from environment\"\n",
"except DefaultCredentialsError:\n",
" # that failed, so provide your own credentials file\n",
" try:\n",
" client = bigquery.Client.from_service_account_json(\n",
" os.path.join(os.path.expanduser('~'),\n",
" 'Downloads',\n",
" 'service_account_key.json'))\n",
" print \"Loaded credentials from provided file\"\n",
" except:\n",
" # further errors, just fall back to OAuth\n",
" appflow = flow.InstalledAppFlow.from_client_secrets_file(\n",
" # you must either have an existing client identifier, or create your own. \n",
" os.path.join(os.path.expanduser('~'),\n",
" 'Downloads',\n",
" 'oauth2_client_id.json'),\n",
" # see https://developers.google.com/identity/protocols/googlescopes\n",
" scopes=['https://www.googleapis.com/auth/bigquery'])\n",
"\n",
" if launch_browser:\n",
" appflow.run_local_server()\n",
" else:\n",
" appflow.run_console()\n",
" \n",
" client = bigquery.Client(project=project, credentials=appflow.credentials)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"query = 'SELECT concept_name FROM omop5deid.concept where invalid_reason is null order by concept_name limit 5'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Asynchronously run a query\n",
"def wait_for_job(job):\n",
" while True:\n",
" job.reload() # Refreshes the state via a GET request.\n",
" if job.state == 'DONE':\n",
" if job.error_result:\n",
" raise RuntimeError(job.errors)\n",
" return\n",
" time.sleep(2)\n",
" \n",
"query_job = client.run_async_query(str(uuid4()), query)\n",
"query_job.use_legacy_sql = False\n",
"query_job.begin()\n",
"\n",
"wait_for_job(query_job)\n",
"\n",
"# Drain the query results by requesting a page at a time.\n",
"query_results = query_job.results()\n",
"page_token = None\n",
"\n",
"while True:\n",
" rows, total_rows, page_token = query_results.fetch_data(\n",
" max_results=10,\n",
" page_token=page_token)\n",
" \n",
" for row in rows:\n",
" print row[0]\n",
"\n",
" if not page_token:\n",
" break "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Synchronously run a query - should only use this for small/fast running queries\n",
"results = client.run_sync_query(query)\n",
"results.use_legacy_sql = False\n",
"results.timeout_ms = 5000 # TIMEOUT IN MILLISECONDS\n",
"results.run()\n",
"\n",
"# depending on query, may need to use paging as shown in asynchronous query above\n",
"rows = results.rows\n",
"for row in rows:\n",
" print row[0]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment