Created
September 1, 2023 15:15
-
-
Save DrDanL/d20eb1ff7197cad181bc318b6eef0890 to your computer and use it in GitHub Desktop.
Query and download data from Google BigQuery buckets using Python and google.cloud and pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.cloud import bigquery | |
from google.oauth2 import service_account | |
import pandas as pd | |
# generate a service account container based on service account key | |
credentials = service_account.Credentials.from_service_account_file('ServiceAccountKey.json') | |
# define the project ID | |
project_id = '' | |
# create a biquery client using credentials and project id | |
client = bigquery.Client(credentials=credentials, project=project_id) | |
# define the query | |
query_job = client.query("""SELECT * FROM `<PROJECT>.analytics_289340186.events_*`""") | |
# this query above selects all the data in the analytics table but this can be any table | |
# this is the clever bit, we perform the query and convert the result to a panda dataframe | |
df = ( | |
query_job.result() | |
.to_dataframe( | |
# Optionally, explicitly request to use the BigQuery Storage API. As of | |
# google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage | |
# API is used by default. | |
) | |
) | |
display(df.head(10)) | |
# you can then modify the data as required | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment