Skip to content

Instantly share code, notes, and snippets.

@amontalenti
Last active May 19, 2016 13:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amontalenti/8b6e33f35353a9c5fd47625dcd94d38e to your computer and use it in GitHub Desktop.
Save amontalenti/8b6e33f35353a9c5fd47625dcd94d38e to your computer and use it in GitHub Desktop.
SELECT title, views FROM [test.parselyblog]
-- utilities
-- STRFTIME_UTC_USEC(pub_date, "%Y-%m")
-- SUM(views)a
-- COUNT(distinct url)
-- find dupes
-- ----------
SELECT title, COUNT(1) as num_urls
FROM [test.parselyblog]
GROUP BY title
ORDER BY num_urls DESC
LIMIT 100
-- calculate aggs
-- --------------
-- total views by author
SELECT author, SUM(views) as author_total_views
FROM [test.parselyblog]
GROUP BY author
-- total views by section
SELECT section, SUM(views) as section_total_views
FROM [test.parselyblog]
GROUP BY section
-- views by month over time
SELECT STRFTIME_UTC_USEC(pub_date, "%Y-%m") as month, SUM(views) as month_total_views
FROM [test.parselyblog]
GROUP BY month
ORDER BY month DESC
-- using subselects to do author, author # posts, and author views per post
SELECT author, author_posts, CAST(author_views / author_posts as INTEGER) as author_avg_views
FROM (
SELECT author, SUM(views) as author_views, COUNT(1) as author_posts
FROM [test.parselyblog]
GROUP BY author)
-- using subselects to do author, author # posts, author # visitors, and total "man-days" spent on content!
SELECT author, author_posts, author_visitors, CAST(author_time / 60 / 24 as INTEGER) as author_man_days
FROM (
SELECT author, SUM(visitors) as author_visitors, SUM(minutes) as author_time, COUNT(1) as author_posts
FROM [test.parselyblog]
GROUP BY author)
-- use subselects together with keyword match and filtering
SELECT has_keyword, total_views, total_posts, CAST(total_views / total_posts as INTEGER) as avg_views
FROM (
SELECT LOWER(title) CONTAINS "facebook" AS has_keyword, SUM(views) as total_views, COUNT(1) as total_posts
FROM [test.parselyblog]
WHERE author <> "Andrew Montalenti"
GROUP BY has_keyword
)
/* BigQuery schema:
url:STRING,
title:STRING,
pub_date:TIMESTAMP,
author:STRING,
section:STRING,
tags:STRING,
sort_metric:INTEGER,
visitors:INTEGER,
views:INTEGER,
minutes:INTEGER,
shares:INTEGER,
views_desktop:INTEGER,
views_mobile:INTEGER,
views_tablet:INTEGER,
refs_social:INTEGER,
refs_search:INTEGER,
refs_internal:INTEGER,
refs_other:INTEGER,
refs_direct:INTEGER,
visitors_new:INTEGER,
visitors_returning:INTEGER,
views_new:INTEGER,
views_returning:INTEGER,
minutes_new:INTEGER,
minutes_returning:INTEGER
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment