Skip to content

Instantly share code, notes, and snippets.

@MattFaus
Last active August 29, 2015 14:02
Show Gist options
  • Save MattFaus/d16c6baab88a018fda97 to your computer and use it in GitHub Desktop.
Save MattFaus/d16c6baab88a018fda97 to your computer and use it in GitHub Desktop.
Some helper functions to build a SELECT statement for defining a view.
def get_table_schema(dataset, table):
"""If the table exists, returns its schema. Otherwise, returns None."""
table_service = BigQueryService.get_service().tables()
try:
get_result = table_service.get(
projectId=BQ_PROJECT_ID,
datasetId=dataset,
tableId=table
).execute()
return get_result['schema']
except apiclient_errors.HttpError as e:
# Return None if the table doesn't exist.
if json.loads(e.content)['error']['code'] == 404:
return None
else:
raise
def get_leaf_column_selectors(dataset, table):
"""Parses the table's schema to generate a list of column selectors.
BigQuery tables may have record fields, which have a hierarchical schema.
Each subfield is delimited with a dot, but BigQuery views cannot have dots
in their names. So, instead of defining the view like:
SELECT
__key__.namespace,
__key__.path
FROM
[MyTable]
You have to define it like:
SELECT
__key__.namespace as __key___namespace,
__key__.path as __key___path
FROM
[MyTable]
For more information, see http://stackoverflow.com/questions/23840038
"""
schema = get_table_schema(dataset, table)
if not schema:
raise BigQueryError('Expected table %s.%s to exist.' % (
dataset, table))
return ",\n".join([
_get_leaf_selectors("", top_field)
for top_field in schema["fields"]
])
def _get_leaf_selectors(prefix, field):
"""Recursive helper for get_leaf_column_selectors()"""
field_name = field["name"]
if prefix:
field_name = prefix + "." + field_name
if 'fields' not in field:
if "." in field_name:
# If we translate user.email to user_email, the user_email field
# may already exist as a top-level field, so prepend an underscore
# to signify this is a record-turned-regular field. There shouldn't
# be any top-level actual fields that start with an underscore.
safe_name = field_name.replace(".", "_")
return "%s as _%s" % (field_name, safe_name)
else:
return field_name
else:
# Recursive case
return ",\n".join([
_get_leaf_selectors(field_name, sub_field)
for sub_field in field["fields"]
])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment