Last active
January 27, 2024 09:40
-
-
Save rfmcnally/b9e98716ac09768b2b7f7a596681da0e to your computer and use it in GitHub Desktop.
Example of fetching data from Google Sheets document and loading into Pandas DataFrame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
from dataclassses import dataclass, asdict | |
import pandas as pd | |
from googleapiclient.discovery import build | |
from google.oauth2 import service_account | |
@dataclass | |
class GoogleServiceAccount: | |
auth_uri: str | |
auth_provider_x509_cert_url: str | |
client_email: str | |
client_id: str | |
client_x509_cert_url: str | |
private_key: str | |
private_key_id: str | |
project_id: str | |
token_uri: str | |
type: str | |
dict = asdict | |
def load_gsheets(account_info: GoogleServiceAccount, spreadsheet_id: str, range: str) -> pd.DataFrame: | |
""" Load Google Sheets data into Pandas DataFrame """ | |
# Authenticate Google Service Account and initiate Spreadsheet Service | |
credentials = service_account.Credentials.from_service_account_info( | |
info=account_info.dict(), | |
scopes=SCOPES | |
) | |
service = build("sheets", "v4", credentials=credentials) | |
sheet = service.spreadsheets() | |
# Fetch Spreadsheet + Range and set first row as column names | |
result = sheet.values().get( | |
spreadsheetId=spreadsheet_id, | |
range=range | |
).execute() | |
data_rows = result["values"][1:] | |
column_names = result["values"][0] | |
# Reduce length of column names if greater than number of columns in first row | |
if len(column_names) > len(data_rows[0]): | |
column_names = column_names[:len(data_rows[0])] | |
# Load into DataFrame, using first row as column names | |
df = pd.DataFrame(data_rows, columns=column_names) | |
return df | |
# Sample Usage, see GoogleServiceAccount dataclass for required fields | |
account_info = GoogleServiceAccount(**json.loads(os.environ['GOOGLE_SERVICE_ACCOUNT'])) | |
spreadsheet_id = "YOUR SPREADSHEET ID" # ID for the Google Spreadsheet, found in the URL | |
range = "YOUR SHEET/RANGE" # Can be sheet name or combination of sheet and range, e.g. Sheet1!A1:C100 | |
df = load_gsheets(account_info, spreadsheet_id, range) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment