Skip to content

Instantly share code, notes, and snippets.

@christippett
Created February 5, 2019 07:45
Show Gist options
  • Save christippett/d3c17870f6e9acff014775b489d23cec to your computer and use it in GitHub Desktop.
Save christippett/d3c17870f6e9acff014775b489d23cec to your computer and use it in GitHub Desktop.
Useful Python functions for Google Cloud Platform
import re
GCS_URL_PATTERN = re.compile(
r'^(?:gs://)'
r'(?P<bucket_name>.+?)'
r'(?:/(?P<object_name>.+?)$|/?$)')
BIGQUERY_TABLE_PATTERN = re.compile(
r'(?:(?P<project_id>.+)\.)?'
r'(?P<dataset_id>.+)\.'
r'(?P<table>.+)'
)
def parse_gcs_bucket_url(bucket_url):
"""
Gets components that make up a Google Cloud Storage bucket URL
:param bucket_url: Bucket URL (e.g. 'gs://bucket/folder/file.txt')
:return: bucket_name, path, filename
"""
match = GCS_URL_PATTERN.search(bucket_url)
bucket_name, path, filename = None, None, None # set default
if match:
bucket_name, object_name = match.groups()
if object_name:
path, _, filename = object_name.rpartition('/')
return bucket_name, path, filename
def parse_bigquery_table(bigquery_table):
"""
Gets components that make up a Big Query table
:param bigquery_table: Big Query table (e.g. 'project_id.dataset_id.table_id')
:return: project_id, dataset_id, table_id
"""
match = BIGQUERY_TABLE_PATTERN.search(bigquery_table)
if match:
return match.groups()
return None, None, None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment