Last active
September 19, 2017 12:13
-
-
Save rachelannelise/2b0d921fd16ae1e44f21bed6e165fa05 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--CONTENT_DIM: | |
--Grain: 1 row per apikey, post | |
--Surrogate Key: apikey + metadata_post_id | |
--Dimesions: metadata | |
--Calculated dimensions: word_count_buckets | |
CREATE TABLE public.content_dim ( | |
-- keys | |
content_key varchar(256), | |
-- idetifying fields | |
apikey VARCHAR(256) NOT NULL, | |
metadata_post_id VARCHAR(4096), | |
-- dimensions | |
metadata_authors VARCHAR(MAX), | |
metadata_canonical_url VARCHAR(4096), | |
metadata_custom_metadata VARCHAR(4096), | |
metadata_duration INTEGER, | |
metadata_full_content_word_count INTEGER, | |
metadata_image_url VARCHAR(4096), | |
metadata_page_type VARCHAR(256), | |
metadata_pub_date_tmsp BIGINT, | |
metadata_save_date_tmsp BIGINT, | |
metadata_section VARCHAR(256), | |
metadata_share_urls VARCHAR(MAX), | |
metadata_tags VARCHAR(MAX), | |
metadata_thumb_url VARCHAR(4096), | |
metadata_title VARCHAR(4096), | |
metadata_urls VARCHAR(MAX), | |
-- calculated dimensions | |
word_count_buckets VARCHAR(4096) | |
) distkey(content_key) sortkey(content_key) | |
CREATE TABLE public.content_dim | |
distkey(content_key) | |
sortkey(content_key) | |
as | |
select distinct | |
content_key, | |
--- ids | |
apikey, | |
metadata_post_id, | |
--- derived dimensions | |
case | |
when metadata_full_content_word_count >= 4000 then '4,000 or Above' | |
when metadata_full_content_word_count >= 3000 then '3,000 - 3,999' | |
when metadata_full_content_word_count >= 2000 then '2,000 - 2,999' | |
when metadata_full_content_word_count >= 1000 then '1,000 - 1,999' | |
when metadata_full_content_word_count >= 500 then '500 - 999' | |
when metadata_full_content_word_count >= 100 then '100 - 499' | |
else '< 100' end as word_count_buckets, | |
--- dimensions | |
metadata_authors, | |
metadata_full_content_word_count, | |
metadata_page_type, | |
metadata_pub_date_tmsp, | |
metadata_canonical_url, | |
metadata_section, | |
metadata_tags, | |
metadata_title | |
from public.tmp_dpl_staging | |
where metadata_post_id is not null; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment