Created
June 16, 2010 20:09
-
-
Save mikelikespie/441192 to your computer and use it in GitHub Desktop.
This is an example how to use custom aggregates and windowing functions in postgres to stitch sessions using timeouts. sessionize_postgres_output.out is what happens when you run sessionize_postgres.sql.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
drop schema sessionize_test cascade; | |
create schema sessionize_test; | |
set search_path to sessionize_test; | |
create type user_session as ( | |
start_time timestamptz, | |
last_time timestamptz | |
); | |
CREATE OR REPLACE FUNCTION user_session_transfn(sess user_session, cur_time timestamptz, session_timeout interval) RETURNS user_session AS $$ | |
SELECT case | |
when $1 is null or $2 - $1.last_time > $3 then | |
($2, $2)::user_session -- if the session timed out, start a new id. | |
else | |
($1.start_time, $2)::user_session -- if it stayed the same, just leave it | |
end; | |
$$ LANGUAGE SQL IMMUTABLE; | |
CREATE OR REPLACE FUNCTION user_session_finalfn(sess user_session) RETURNS timestamptz AS $$ | |
SELECT $1.start_time; | |
$$ LANGUAGE SQL IMMUTABLE; | |
CREATE AGGREGATE sessionize (timestamptz, interval) | |
( | |
sfunc = user_session_transfn, | |
finalfunc = user_session_finalfn, | |
stype = user_session | |
); | |
create table user_events ( | |
user_id int, | |
event_time timestamptz | |
); | |
copy user_events from stdin; | |
1 2010-06-15 12:00:00 | |
1 2010-06-15 12:01:00 | |
1 2010-06-15 12:02:00 | |
1 2010-06-15 12:03:00 | |
1 2010-06-15 12:04:00 | |
1 2010-06-15 12:30:00 | |
1 2010-06-15 12:33:00 | |
1 2010-06-15 12:50:00 | |
1 2010-06-15 14:30:00 | |
2 2010-06-15 12:00:00 | |
2 2010-06-15 12:15:00 | |
2 2010-06-15 12:40:00 | |
2 2010-06-15 13:02:00 | |
2 2010-06-15 14:03:00 | |
2 2010-06-15 15:04:00 | |
2 2010-06-15 18:50:00 | |
3 2010-06-15 19:30:00 | |
3 2010-06-15 19:30:01 | |
3 2010-06-15 19:30:02 | |
3 2010-06-15 19:30:03 | |
3 2010-06-15 19:30:04 | |
5 2010-06-16 19:30:04 | |
5 2010-06-16 19:35:04 | |
5 2010-06-16 19:40:04 | |
5 2010-06-16 19:45:04 | |
5 2010-06-16 19:56:04 | |
\. | |
create table user_events_sessionized as | |
select | |
user_id, | |
event_time, | |
sessionize(event_time, '30 minutes') over (partition by user_id order by event_time) as session_start | |
from user_events; | |
select * from user_events_sessionized; | |
create table user_session_length as | |
select | |
user_id, | |
session_start, | |
max(event_time) - session_start as session_length, | |
count(*) as num_events | |
from user_events_sessionized | |
group by user_id, session_start | |
order by session_start; | |
select * from user_session_length; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create schema sessionize_test; | |
CREATE SCHEMA | |
set search_path to sessionize_test; | |
SET | |
create type user_session as ( | |
start_time timestamptz, | |
last_time timestamptz | |
); | |
CREATE TYPE | |
CREATE OR REPLACE FUNCTION user_session_transfn(sess user_session, cur_time timestamptz, session_timeout interval) RETURNS user_session AS $$ | |
SELECT case | |
when $1 is null or $2 - $1.last_time > $3 then | |
($2, $2)::user_session -- if the session timed out, start a new id. | |
else | |
($1.start_time, $2)::user_session -- if it stayed the same, just leave it | |
end; | |
$$ LANGUAGE SQL IMMUTABLE; | |
CREATE FUNCTION | |
CREATE OR REPLACE FUNCTION user_session_finalfn(sess user_session) RETURNS timestamptz AS $$ | |
SELECT $1.start_time; | |
$$ LANGUAGE SQL IMMUTABLE; | |
CREATE FUNCTION | |
CREATE AGGREGATE sessionize (timestamptz, interval) | |
( | |
sfunc = user_session_transfn, | |
finalfunc = user_session_finalfn, | |
stype = user_session | |
); | |
CREATE AGGREGATE | |
create table user_events ( | |
user_id int, | |
event_time timestamptz | |
); | |
CREATE TABLE | |
copy user_events from stdin; | |
create table user_events_sessionized as | |
select | |
user_id, | |
event_time, | |
sessionize(event_time, '30 minutes') over (partition by user_id order by event_time) as session_start | |
from user_events; | |
SELECT 26 | |
select * from user_events_sessionized; | |
user_id | event_time | session_start | |
---------+------------------------+------------------------ | |
1 | 2010-06-15 12:00:00-07 | 2010-06-15 12:00:00-07 | |
1 | 2010-06-15 12:01:00-07 | 2010-06-15 12:00:00-07 | |
1 | 2010-06-15 12:02:00-07 | 2010-06-15 12:00:00-07 | |
1 | 2010-06-15 12:03:00-07 | 2010-06-15 12:00:00-07 | |
1 | 2010-06-15 12:04:00-07 | 2010-06-15 12:00:00-07 | |
1 | 2010-06-15 12:30:00-07 | 2010-06-15 12:00:00-07 | |
1 | 2010-06-15 12:33:00-07 | 2010-06-15 12:00:00-07 | |
1 | 2010-06-15 12:50:00-07 | 2010-06-15 12:00:00-07 | |
1 | 2010-06-15 14:30:00-07 | 2010-06-15 14:30:00-07 | |
2 | 2010-06-15 12:00:00-07 | 2010-06-15 12:00:00-07 | |
2 | 2010-06-15 12:15:00-07 | 2010-06-15 12:00:00-07 | |
2 | 2010-06-15 12:40:00-07 | 2010-06-15 12:00:00-07 | |
2 | 2010-06-15 13:02:00-07 | 2010-06-15 12:00:00-07 | |
2 | 2010-06-15 14:03:00-07 | 2010-06-15 14:03:00-07 | |
2 | 2010-06-15 15:04:00-07 | 2010-06-15 15:04:00-07 | |
2 | 2010-06-15 18:50:00-07 | 2010-06-15 18:50:00-07 | |
3 | 2010-06-15 19:30:00-07 | 2010-06-15 19:30:00-07 | |
3 | 2010-06-15 19:30:01-07 | 2010-06-15 19:30:00-07 | |
3 | 2010-06-15 19:30:02-07 | 2010-06-15 19:30:00-07 | |
3 | 2010-06-15 19:30:03-07 | 2010-06-15 19:30:00-07 | |
3 | 2010-06-15 19:30:04-07 | 2010-06-15 19:30:00-07 | |
5 | 2010-06-16 19:30:04-07 | 2010-06-16 19:30:04-07 | |
5 | 2010-06-16 19:35:04-07 | 2010-06-16 19:30:04-07 | |
5 | 2010-06-16 19:40:04-07 | 2010-06-16 19:30:04-07 | |
5 | 2010-06-16 19:45:04-07 | 2010-06-16 19:30:04-07 | |
5 | 2010-06-16 19:56:04-07 | 2010-06-16 19:30:04-07 | |
(26 rows) | |
create table user_session_length as | |
select | |
user_id, | |
session_start, | |
max(event_time) - session_start as session_length, | |
count(*) as num_events | |
from user_events_sessionized | |
group by user_id, session_start | |
order by session_start; | |
SELECT 8 | |
select * from user_session_length; | |
user_id | session_start | session_length | num_events | |
---------+------------------------+----------------+------------ | |
1 | 2010-06-15 12:00:00-07 | 00:50:00 | 8 | |
2 | 2010-06-15 12:00:00-07 | 01:02:00 | 4 | |
2 | 2010-06-15 14:03:00-07 | 00:00:00 | 1 | |
1 | 2010-06-15 14:30:00-07 | 00:00:00 | 1 | |
2 | 2010-06-15 15:04:00-07 | 00:00:00 | 1 | |
2 | 2010-06-15 18:50:00-07 | 00:00:00 | 1 | |
3 | 2010-06-15 19:30:00-07 | 00:00:04 | 5 | |
5 | 2010-06-16 19:30:04-07 | 00:26:00 | 5 | |
(8 rows) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment