Last active
December 25, 2015 05:19
-
-
Save davidh-raybeam/6923417 to your computer and use it in GitHub Desktop.
avg_timespan.sql is a Vertica SQL statement which will, given a table like the one in setup.sql, calculate the mean amount of time a user spends on our website before completing a purchase, plus its standard deviation. Note that very little effort has been made to optimize this operation.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
AVG(session_length) as mean_session_length, | |
STDDEV(session_length_seconds) as session_length_stddev | |
FROM ( | |
SELECT | |
event_name, | |
MAX(action_timestamp) OVER(PARTITION BY user_hostname, pattern_id) | |
- MIN(action_timestamp) OVER(PARTITION BY user_hostname, pattern_id) | |
as session_length, | |
datediff('second', | |
MIN(action_timestamp) OVER(PARTITION BY user_hostname, pattern_id), | |
MAX(action_timestamp) OVER(PARTITION BY user_hostname, pattern_id))::float | |
as session_length_seconds | |
FROM ( | |
SELECT | |
user_hostname, | |
action_timestamp, | |
event_name(), | |
pattern_id() | |
FROM visitor_actions | |
MATCH ( | |
PARTITION BY user_hostname | |
ORDER BY action_timestamp | |
DEFINE | |
SessionStart as (referring_url IS NULL OR referring_url NOT LIKE '%://example.com/%') AND | |
action_type = 'page_view', | |
PageView as referring_url LIKE '%://example.com/%' AND | |
action_type = 'page_view', | |
PurchaseAction as action_type = 'purchase' | |
PATTERN P as (SessionStart PageView* PurchaseAction) | |
) | |
) as annotated_stream | |
) as session_lengths | |
WHERE event_name = 'PurchaseAction'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE visitor_actions ( | |
action_timestamp TIMESTAMP NOT NULL, | |
referring_url VARCHAR(512), | |
action_type VARCHAR(512) NOT NULL, | |
user_hostname VARCHAR(256) NOT NULL | |
) | |
ORDER BY action_timestamp | |
SEGMENTED BY hash(action_type, user_hostname) ALL NODES; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment