Skip to content

Instantly share code, notes, and snippets.

@ContrastingSounds
Created January 15, 2018 09:20
Show Gist options
  • Save ContrastingSounds/5de9d7cb7abfc468fab069b0121a0cc9 to your computer and use it in GitHub Desktop.
Save ContrastingSounds/5de9d7cb7abfc468fab069b0121a0cc9 to your computer and use it in GitHub Desktop.
Streams data to an existing Google BigQuery table.
import logging
from google.cloud import bigquery
SERVICE_ACCOUNT = '/path/to/credentials/file/<role>-<project>.json'
BQ_DATASET = 'state_data'
client = bigquery.Client.from_service_account_json(SERVICE_ACCOUNT)
dataset = BQ_DATASET
logger = logging.getLogger()
def stream_data(client: bigquery.Client, dataset_name: str, table_name: str, data: list):
"""
Given the necessary credentials and references, streams data to a BigQuery table
:param client: a google.cloud.bigquery.Client from the Google SDK.
:param dataset_name: name of target dataset
:param table_name: name of target datatable
:param data: list of dictionaries containing the data records
:return:
"""
dataset_ref = client.dataset(dataset_name)
table_ref = dataset_ref.table(table_name)
# Get the table from the API so that the schema is available.
table = client.get_table(table_ref)
# BigQuery imposes a limit on the size of each stream
# It can be useful during debugging to check on how much you are sending
# A complete design should split large datasets into chunks before calling the streaming function
logger.debug(f'stream_data() for {len(data)} rows')
errors = client.create_rows(table, data)
if errors:
logger.info(f'BigQuery Streaming Errors: {errors}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment