robfraz/df_to_bigquery_example.py

## df_to_bigquery_example.py
from pathlib import Path
from tempfile import NamedTemporaryFile

import numpy as np
import pandas as pd

from google.cloud.bigquery import Client, SchemaField


def main():
    gbq_creds_path = Path("~/.gbq_creds.json").expanduser()
    client = Client.from_service_account_json(gbq_creds_path)

    schema = [
        SchemaField('field1', 'float64', 'REQUIRED'),
        SchemaField('field2', 'float64', 'REQUIRED'),
        SchemaField('field3', 'float64', 'REQUIRED'),
        SchemaField('field4', 'float64', 'REQUIRED')
    ]

    table = client.dataset('my_dataset').table('test1', schema)

    df = pd.DataFrame(np.random.rand(10, 4))  # Create simple dataframe

    df_to_bigquery(df, table)


def df_to_bigquery(df,
                   table,
                   write_disposition='WRITE_EMPTY',
                   blocking=True):
    """Upload a Pandas DataFrame to Google BigQuery

    Args:
        df (DataFrame): The Pandas DataFrame to be uploaded.
        table (google.cloud.bigquery.Table): BigQuery table object.
        write_disposition (str): Either 'WRITE_EMPTY', 'WRITE_TRUNCATE', or
            'WRITE_APPEND'; the default is 'WRITE_EMPTY'.
        blocking (bool): Set to False if you don't want to block until the job
           is complete.

    Returns:
        google.cloud.bigquery.Job: The file upload job object.  If you have set
            blocking=False, this can be used to check for job completion.
    """
    with NamedTemporaryFile(mode='w',
                            encoding='UTF-8',
                            prefix="df_to_bigquery_",
                            suffix=".csv") as writebuf:
        df.to_csv(writebuf, index=False, encoding='UTF-8')
        writebuf.flush()

        # Annoyingly, df.to_csv above requires a non binary mode file handle,
        # whereas table.upload_from_file below requires a binary mode file
        # handle, so we end up with nested context handlers.
        with open(writebuf.name, mode='rb') as readbuf:
            job = table.upload_from_file(readbuf,
                                         encoding='UTF-8',
                                         source_format='CSV',
                                         skip_leading_rows=1,
                                         create_disposition='CREATE_IF_NEEDED',
                                         write_disposition=write_disposition)

    if blocking:
        job.result()

    return job


if __name__ == '__main__':
    main()
	from pathlib import Path
	from tempfile import NamedTemporaryFile

	import numpy as np
	import pandas as pd

	from google.cloud.bigquery import Client, SchemaField


	def main():
	gbq_creds_path = Path("~/.gbq_creds.json").expanduser()
	client = Client.from_service_account_json(gbq_creds_path)

	schema = [
	SchemaField('field1', 'float64', 'REQUIRED'),
	SchemaField('field2', 'float64', 'REQUIRED'),
	SchemaField('field3', 'float64', 'REQUIRED'),
	SchemaField('field4', 'float64', 'REQUIRED')
	]

	table = client.dataset('my_dataset').table('test1', schema)

	df = pd.DataFrame(np.random.rand(10, 4)) # Create simple dataframe

	df_to_bigquery(df, table)


	def df_to_bigquery(df,
	table,
	write_disposition='WRITE_EMPTY',
	blocking=True):
	"""Upload a Pandas DataFrame to Google BigQuery

	Args:
	df (DataFrame): The Pandas DataFrame to be uploaded.
	table (google.cloud.bigquery.Table): BigQuery table object.
	write_disposition (str): Either 'WRITE_EMPTY', 'WRITE_TRUNCATE', or
	'WRITE_APPEND'; the default is 'WRITE_EMPTY'.
	blocking (bool): Set to False if you don't want to block until the job
	is complete.

	Returns:
	google.cloud.bigquery.Job: The file upload job object. If you have set
	blocking=False, this can be used to check for job completion.
	"""
	with NamedTemporaryFile(mode='w',
	encoding='UTF-8',
	prefix="df_to_bigquery_",
	suffix=".csv") as writebuf:
	df.to_csv(writebuf, index=False, encoding='UTF-8')
	writebuf.flush()

	# Annoyingly, df.to_csv above requires a non binary mode file handle,
	# whereas table.upload_from_file below requires a binary mode file
	# handle, so we end up with nested context handlers.
	with open(writebuf.name, mode='rb') as readbuf:
	job = table.upload_from_file(readbuf,
	encoding='UTF-8',
	source_format='CSV',
	skip_leading_rows=1,
	create_disposition='CREATE_IF_NEEDED',
	write_disposition=write_disposition)

	if blocking:
	job.result()

	return job


	if __name__ == '__main__':
	main()