Skip to content

Instantly share code, notes, and snippets.

@rachelannelise
Last active September 19, 2017 12:13
Show Gist options
  • Save rachelannelise/2b0d921fd16ae1e44f21bed6e165fa05 to your computer and use it in GitHub Desktop.
Save rachelannelise/2b0d921fd16ae1e44f21bed6e165fa05 to your computer and use it in GitHub Desktop.
--CONTENT_DIM:
--Grain: 1 row per apikey, post
--Surrogate Key: apikey + metadata_post_id
--Dimesions: metadata
--Calculated dimensions: word_count_buckets
CREATE TABLE public.content_dim (
-- keys
content_key varchar(256),
-- idetifying fields
apikey VARCHAR(256) NOT NULL,
metadata_post_id VARCHAR(4096),
-- dimensions
metadata_authors VARCHAR(MAX),
metadata_canonical_url VARCHAR(4096),
metadata_custom_metadata VARCHAR(4096),
metadata_duration INTEGER,
metadata_full_content_word_count INTEGER,
metadata_image_url VARCHAR(4096),
metadata_page_type VARCHAR(256),
metadata_pub_date_tmsp BIGINT,
metadata_save_date_tmsp BIGINT,
metadata_section VARCHAR(256),
metadata_share_urls VARCHAR(MAX),
metadata_tags VARCHAR(MAX),
metadata_thumb_url VARCHAR(4096),
metadata_title VARCHAR(4096),
metadata_urls VARCHAR(MAX),
-- calculated dimensions
word_count_buckets VARCHAR(4096)
) distkey(content_key) sortkey(content_key)
CREATE TABLE public.content_dim
distkey(content_key)
sortkey(content_key)
as
select distinct
content_key,
--- ids
apikey,
metadata_post_id,
--- derived dimensions
case
when metadata_full_content_word_count >= 4000 then '4,000 or Above'
when metadata_full_content_word_count >= 3000 then '3,000 - 3,999'
when metadata_full_content_word_count >= 2000 then '2,000 - 2,999'
when metadata_full_content_word_count >= 1000 then '1,000 - 1,999'
when metadata_full_content_word_count >= 500 then '500 - 999'
when metadata_full_content_word_count >= 100 then '100 - 499'
else '< 100' end as word_count_buckets,
--- dimensions
metadata_authors,
metadata_full_content_word_count,
metadata_page_type,
metadata_pub_date_tmsp,
metadata_canonical_url,
metadata_section,
metadata_tags,
metadata_title
from public.tmp_dpl_staging
where metadata_post_id is not null;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment