Skip to content

Instantly share code, notes, and snippets.

@Danny-Driscoll
Created August 7, 2017 23:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Danny-Driscoll/519ab282a843387e33109e3a87153c6a to your computer and use it in GitHub Desktop.
Save Danny-Driscoll/519ab282a843387e33109e3a87153c6a to your computer and use it in GitHub Desktop.
Query introducing session definition based on session_id value
SELECT
experiment_id,
variation_id,
COUNT(DISTINCT end_user_id) AS unique_users,
COUNT(DISTINCT CASE WHEN is_converted = 1
THEN end_user_id END) AS unique_users_converted
FROM (
SELECT
sessions.end_user_id,
sessions.experiment_id,
sessions.variation_id,
sessions.session_id,
sessions.session_start_timestamp,
hits.hit_timestamp,
hits.event_name,
hits.event_type,
CASE WHEN hits.event_name = 'Event ID' AND hits.event_type = 'view_activated'
THEN 1 END AS is_converted
FROM (
SELECT
-- calculate session start as first timestamp in session for given experiment/variation
end_user_id,
experiment_id,
variation_id,
session_id,
MIN(hit_timestamp) AS session_start_timestamp
FROM
(
SELECT
-- convert to timestamp and exclude user IPs
end_user_id,
session_id,
experiment_id,
variation_id,
TIMESTAMP 'epoch' + hit_timestamp :: BIGINT * INTERVAL '1 Second ' AS hit_timestamp
FROM
optimizely.hits
WHERE user_ip !~
'^IP REGEX'
AND user_ip !~ '^IP REGEX $'
AND user_ip != 'IP REGEX'
) AS h
WHERE
experiment_id = 1234567909
GROUP BY
end_user_id,
session_id,
experiment_id,
variation_id
ORDER BY session_start_timestamp
) AS sessions
JOIN
(
SELECT
hits.experiment_id,
hits.variation_id,
hits.end_user_id,
hits.session_id,
hits.event_name,
hits.event_type,
TIMESTAMP 'epoch' + hit_timestamp :: BIGINT * INTERVAL '1 Second ' AS hit_timestamp
FROM optimizely.hits
WHERE experiment_id = 123456789
) AS hits
ON sessions.end_user_id = hits.end_user_id
AND sessions.session_id = hits.session_id
ORDER BY session_start_timestamp, hit_timestamp
) AS a
WHERE
session_start_timestamp >= '2017-04-14 00:00:00'
AND session_start_timestamp < '2017-04-25 00:00:00'
GROUP BY
experiment_id,
variation_id;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment