Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Example upload of Pandas DataFrame to Google BigQuery via temporary CSV file
from pathlib import Path
from tempfile import NamedTemporaryFile
import numpy as np
import pandas as pd
from google.cloud.bigquery import Client, SchemaField
def main():
gbq_creds_path = Path("~/.gbq_creds.json").expanduser()
client = Client.from_service_account_json(gbq_creds_path)
schema = [
SchemaField('field1', 'float64', 'REQUIRED'),
SchemaField('field2', 'float64', 'REQUIRED'),
SchemaField('field3', 'float64', 'REQUIRED'),
SchemaField('field4', 'float64', 'REQUIRED')
]
table = client.dataset('my_dataset').table('test1', schema)
df = pd.DataFrame(np.random.rand(10, 4)) # Create simple dataframe
df_to_bigquery(df, table)
def df_to_bigquery(df,
table,
write_disposition='WRITE_EMPTY',
blocking=True):
"""Upload a Pandas DataFrame to Google BigQuery
Args:
df (DataFrame): The Pandas DataFrame to be uploaded.
table (google.cloud.bigquery.Table): BigQuery table object.
write_disposition (str): Either 'WRITE_EMPTY', 'WRITE_TRUNCATE', or
'WRITE_APPEND'; the default is 'WRITE_EMPTY'.
blocking (bool): Set to False if you don't want to block until the job
is complete.
Returns:
google.cloud.bigquery.Job: The file upload job object. If you have set
blocking=False, this can be used to check for job completion.
"""
with NamedTemporaryFile(mode='w',
encoding='UTF-8',
prefix="df_to_bigquery_",
suffix=".csv") as writebuf:
df.to_csv(writebuf, index=False, encoding='UTF-8')
writebuf.flush()
# Annoyingly, df.to_csv above requires a non binary mode file handle,
# whereas table.upload_from_file below requires a binary mode file
# handle, so we end up with nested context handlers.
with open(writebuf.name, mode='rb') as readbuf:
job = table.upload_from_file(readbuf,
encoding='UTF-8',
source_format='CSV',
skip_leading_rows=1,
create_disposition='CREATE_IF_NEEDED',
write_disposition=write_disposition)
if blocking:
job.result()
return job
if __name__ == '__main__':
main()
@pabloazurduy

This comment has been minimized.

Copy link

pabloazurduy commented Feb 20, 2018

Hi,
this example its not longer working in the new version of bigquery. in the line client.dataset('my_dataset').table('test1',schema) the function table only accept one arg (the table name).

from google.cloud import bigquery
print(bigquery.__version__)
'0.30.0'

Im working in a solution for the new bigquery python package version, let me see if i can fix that.

@idshoaib

This comment has been minimized.

Copy link

idshoaib commented Apr 19, 2018

image
it gives permission error. is anybody faced that error? please help me to resolve.
one more thing, is that batch process or streaming?
Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.