Skip to content

Instantly share code, notes, and snippets.

@frsyuki
Last active December 21, 2015 23:58
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save frsyuki/6385494 to your computer and use it in GitHub Desktop.
Save frsyuki/6385494 to your computer and use it in GitHub Desktop.
-- landing page access ranking analysis using Treasure Data
SELECT first_path, COUNT(1) AS count
FROM (
SELECT TD_SESSIONIZE(time, 3600, host) AS session_id, TD_FIRST(path, time) AS first_path
FROM (
SELECT time, v['host'] as host, v['path'] as path
FROM www_access
DISTRIBUTE BY v['host']
SORT BY v['host'], v['time']
) ss
GROUP BY TD_SESSIONIZE(time, 3600, host)
HAVING path = "/signup"
) fs
GROUP BY first_path
ORDER BY count DESC
LIMIT 100
-- contribution raking for signup using Treasure Data
SELECT contributed_path, COUNT(1) AS count
FROM (
SELECT TD_SESSIONIZE(time, 3600, host) AS session_id, COLLECT_SET(path) AS contributed_paths
FROM (
SELECT time, v['host'] as host, v['path'] as path
FROM www_access
DISTRIBUTE BY v['host']
SORT BY v['host'], v['time']
) ss
GROUP BY TD_SESSIONIZE(time, 3600, host)
HAVING path = "/signup"
) fs
LATERAL VIEW EXPLODE(contributed_paths) AS contributed_path
WHERE contributed_path != "/signup"
GROUP BY contributed_path
ORDER BY count DESC
LIMIT 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment