Last active
May 5, 2022 08:36
-
-
Save DiscordBlog/18d1b39cd1b13c5dbb61caee3eda7726 to your computer and use it in GitHub Desktop.
Version Two - User Ergonomics Code Block
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
columns: | |
- [emoji_id, INT64, NOT NULL, 'ID of an emoji'] | |
- [timestamp, TIMESTAMP, NOT NULL, 'First time a custom emoji was sent'] | |
- [timestamp_hour, TIMESTAMP, NOT NULL, 'Descriptions are added to database schemas so they appear in the BigQuery UI'] | |
description: 'The table description used in data discovery tooling and generated documentation' | |
category: 'Category for organizing generated documentation' | |
strategy: merge # Merge means to incrementally update or insert records into the table based on the merge_keys and merge_mode | |
merge_keys: [emoji_id] # The table will have at most one record per emoji, if there is no record for the emoji then insert it otherwise ignore new records | |
merge_mode: keep_oldest # Only record the first time an emoji was sent and ignore all future records | |
merge_timestamp: timestamp # Determine the first time an emoji was sent based on timestamp in the send_emoji event | |
schedule: daily # Incrementally update this table once per day | |
window: daily # Add one day of source data each run | |
partition_by: timestamp_hour # Partition the BigQuery table by timestamp_hour | |
cluster_by: [emoji_id] # Sort each partition by emoji_id | |
timezone: pt # Timestamps are pacific timezone and daily aggregates are pacific timezone days | |
dataset: emoji_cohorts # BigQuery dataset | |
sql: | | |
SELECT | |
emoji_id, | |
MIN(timestamp) AS timestamp, | |
TIMESTAMP_TRUNC(MIN(timestamp), HOUR) AS timestamp_hour | |
FROM events.send_emoji | |
WHERE timestamp >= @start_day_utc # Derived injects timestamps based on data availability of predecessor dependencies | |
AND timestamp < @end_day_utc # start_day_utc and end_day_utc are computed based on incremental, rebuild, and backfill operations | |
GROUP BY emoji_id |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment