Skip to content

Instantly share code, notes, and snippets.

@rfmcnally
Last active January 27, 2024 09:40
Show Gist options
  • Save rfmcnally/b9e98716ac09768b2b7f7a596681da0e to your computer and use it in GitHub Desktop.
Save rfmcnally/b9e98716ac09768b2b7f7a596681da0e to your computer and use it in GitHub Desktop.
Example of fetching data from Google Sheets document and loading into Pandas DataFrame
import os
import json
from dataclassses import dataclass, asdict
import pandas as pd
from googleapiclient.discovery import build
from google.oauth2 import service_account
@dataclass
class GoogleServiceAccount:
auth_uri: str
auth_provider_x509_cert_url: str
client_email: str
client_id: str
client_x509_cert_url: str
private_key: str
private_key_id: str
project_id: str
token_uri: str
type: str
dict = asdict
def load_gsheets(account_info: GoogleServiceAccount, spreadsheet_id: str, range: str) -> pd.DataFrame:
""" Load Google Sheets data into Pandas DataFrame """
# Authenticate Google Service Account and initiate Spreadsheet Service
credentials = service_account.Credentials.from_service_account_info(
info=account_info.dict(),
scopes=SCOPES
)
service = build("sheets", "v4", credentials=credentials)
sheet = service.spreadsheets()
# Fetch Spreadsheet + Range and set first row as column names
result = sheet.values().get(
spreadsheetId=spreadsheet_id,
range=range
).execute()
data_rows = result["values"][1:]
column_names = result["values"][0]
# Reduce length of column names if greater than number of columns in first row
if len(column_names) > len(data_rows[0]):
column_names = column_names[:len(data_rows[0])]
# Load into DataFrame, using first row as column names
df = pd.DataFrame(data_rows, columns=column_names)
return df
# Sample Usage, see GoogleServiceAccount dataclass for required fields
account_info = GoogleServiceAccount(**json.loads(os.environ['GOOGLE_SERVICE_ACCOUNT']))
spreadsheet_id = "YOUR SPREADSHEET ID" # ID for the Google Spreadsheet, found in the URL
range = "YOUR SHEET/RANGE" # Can be sheet name or combination of sheet and range, e.g. Sheet1!A1:C100
df = load_gsheets(account_info, spreadsheet_id, range)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment