Skip to content

Instantly share code, notes, and snippets.

@GabrielSGoncalves
Created November 17, 2021 18:19
Show Gist options
  • Save GabrielSGoncalves/ff9155246c55ead6d33d1103d51bbad1 to your computer and use it in GitHub Desktop.
Save GabrielSGoncalves/ff9155246c55ead6d33d1103d51bbad1 to your computer and use it in GitHub Desktop.
Function for reading private files from a Google Cloud Storage
from typing import Union, Dict
from io import BytesIO
from bson import json_util
import pandas as pd
import requests
from google.cloud import storage
from google.oauth2 import service_account
def read_file_from_gcloud_storage(
file_format: str,
file_name: str,
gcp_bucket: str,
gcp_project: str,
gcp_credentials_file: str,
**kwargs,
) -> Union[pd.DataFrame, Dict, str]:
"""Read file from Google Cloud Storage into a specific Python object.
Parameters
----------
file_format : str
File format can be 'csv', 'xlsx', 'parquet', 'json' or 'txt'.
file_name : str
String with the name of the target file.
gcp_bucket : str
String with bucket name.
gcp_project : str (default="jeitto-datascience")
String with the name of the project in GCP.
gcp_credentials_file : str
Dictionary with GCP credentials.
Returns
-------
Union[pd.DataFrame, Dict, str].
The specified object generate from target file.
"""
# Authenticate using gcp json credentials
credentials = service_account.Credentials.from_service_account_file(
gcp_credentials_file
)
storage_client = storage.Client(
project=gcp_project, credentials=credentials
)
# Define bucket and file to get
bucket = storage_client.get_bucket(gcp_bucket)
blob = bucket.get_blob(file_name)
binary_stream = blob.download_as_string()
# Return corresponding Python object based on file format
if file_format == "csv":
return pd.read_csv(BytesIO(binary_stream), **kwargs)
elif file_format == "parquet":
return pd.read_parquet(BytesIO(binary_stream), **kwargs)
elif file_format == "json":
return json_util.loads(binary_stream, **kwargs)
elif file_format == "txt":
return binary_stream.decode("utf-8", **kwargs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment