Created
February 5, 2019 07:45
-
-
Save christippett/d3c17870f6e9acff014775b489d23cec to your computer and use it in GitHub Desktop.
Useful Python functions for Google Cloud Platform
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
GCS_URL_PATTERN = re.compile( | |
r'^(?:gs://)' | |
r'(?P<bucket_name>.+?)' | |
r'(?:/(?P<object_name>.+?)$|/?$)') | |
BIGQUERY_TABLE_PATTERN = re.compile( | |
r'(?:(?P<project_id>.+)\.)?' | |
r'(?P<dataset_id>.+)\.' | |
r'(?P<table>.+)' | |
) | |
def parse_gcs_bucket_url(bucket_url): | |
""" | |
Gets components that make up a Google Cloud Storage bucket URL | |
:param bucket_url: Bucket URL (e.g. 'gs://bucket/folder/file.txt') | |
:return: bucket_name, path, filename | |
""" | |
match = GCS_URL_PATTERN.search(bucket_url) | |
bucket_name, path, filename = None, None, None # set default | |
if match: | |
bucket_name, object_name = match.groups() | |
if object_name: | |
path, _, filename = object_name.rpartition('/') | |
return bucket_name, path, filename | |
def parse_bigquery_table(bigquery_table): | |
""" | |
Gets components that make up a Big Query table | |
:param bigquery_table: Big Query table (e.g. 'project_id.dataset_id.table_id') | |
:return: project_id, dataset_id, table_id | |
""" | |
match = BIGQUERY_TABLE_PATTERN.search(bigquery_table) | |
if match: | |
return match.groups() | |
return None, None, None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment