Skip to content

Instantly share code, notes, and snippets.

@mikelikespie
Created June 16, 2010 20:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikelikespie/441192 to your computer and use it in GitHub Desktop.
Save mikelikespie/441192 to your computer and use it in GitHub Desktop.
This is an example how to use custom aggregates and windowing functions in postgres to stitch sessions using timeouts. sessionize_postgres_output.out is what happens when you run sessionize_postgres.sql.
drop schema sessionize_test cascade;
create schema sessionize_test;
set search_path to sessionize_test;
create type user_session as (
start_time timestamptz,
last_time timestamptz
);
CREATE OR REPLACE FUNCTION user_session_transfn(sess user_session, cur_time timestamptz, session_timeout interval) RETURNS user_session AS $$
SELECT case
when $1 is null or $2 - $1.last_time > $3 then
($2, $2)::user_session -- if the session timed out, start a new id.
else
($1.start_time, $2)::user_session -- if it stayed the same, just leave it
end;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION user_session_finalfn(sess user_session) RETURNS timestamptz AS $$
SELECT $1.start_time;
$$ LANGUAGE SQL IMMUTABLE;
CREATE AGGREGATE sessionize (timestamptz, interval)
(
sfunc = user_session_transfn,
finalfunc = user_session_finalfn,
stype = user_session
);
create table user_events (
user_id int,
event_time timestamptz
);
copy user_events from stdin;
1 2010-06-15 12:00:00
1 2010-06-15 12:01:00
1 2010-06-15 12:02:00
1 2010-06-15 12:03:00
1 2010-06-15 12:04:00
1 2010-06-15 12:30:00
1 2010-06-15 12:33:00
1 2010-06-15 12:50:00
1 2010-06-15 14:30:00
2 2010-06-15 12:00:00
2 2010-06-15 12:15:00
2 2010-06-15 12:40:00
2 2010-06-15 13:02:00
2 2010-06-15 14:03:00
2 2010-06-15 15:04:00
2 2010-06-15 18:50:00
3 2010-06-15 19:30:00
3 2010-06-15 19:30:01
3 2010-06-15 19:30:02
3 2010-06-15 19:30:03
3 2010-06-15 19:30:04
5 2010-06-16 19:30:04
5 2010-06-16 19:35:04
5 2010-06-16 19:40:04
5 2010-06-16 19:45:04
5 2010-06-16 19:56:04
\.
create table user_events_sessionized as
select
user_id,
event_time,
sessionize(event_time, '30 minutes') over (partition by user_id order by event_time) as session_start
from user_events;
select * from user_events_sessionized;
create table user_session_length as
select
user_id,
session_start,
max(event_time) - session_start as session_length,
count(*) as num_events
from user_events_sessionized
group by user_id, session_start
order by session_start;
select * from user_session_length;
create schema sessionize_test;
CREATE SCHEMA
set search_path to sessionize_test;
SET
create type user_session as (
start_time timestamptz,
last_time timestamptz
);
CREATE TYPE
CREATE OR REPLACE FUNCTION user_session_transfn(sess user_session, cur_time timestamptz, session_timeout interval) RETURNS user_session AS $$
SELECT case
when $1 is null or $2 - $1.last_time > $3 then
($2, $2)::user_session -- if the session timed out, start a new id.
else
($1.start_time, $2)::user_session -- if it stayed the same, just leave it
end;
$$ LANGUAGE SQL IMMUTABLE;
CREATE FUNCTION
CREATE OR REPLACE FUNCTION user_session_finalfn(sess user_session) RETURNS timestamptz AS $$
SELECT $1.start_time;
$$ LANGUAGE SQL IMMUTABLE;
CREATE FUNCTION
CREATE AGGREGATE sessionize (timestamptz, interval)
(
sfunc = user_session_transfn,
finalfunc = user_session_finalfn,
stype = user_session
);
CREATE AGGREGATE
create table user_events (
user_id int,
event_time timestamptz
);
CREATE TABLE
copy user_events from stdin;
create table user_events_sessionized as
select
user_id,
event_time,
sessionize(event_time, '30 minutes') over (partition by user_id order by event_time) as session_start
from user_events;
SELECT 26
select * from user_events_sessionized;
user_id | event_time | session_start
---------+------------------------+------------------------
1 | 2010-06-15 12:00:00-07 | 2010-06-15 12:00:00-07
1 | 2010-06-15 12:01:00-07 | 2010-06-15 12:00:00-07
1 | 2010-06-15 12:02:00-07 | 2010-06-15 12:00:00-07
1 | 2010-06-15 12:03:00-07 | 2010-06-15 12:00:00-07
1 | 2010-06-15 12:04:00-07 | 2010-06-15 12:00:00-07
1 | 2010-06-15 12:30:00-07 | 2010-06-15 12:00:00-07
1 | 2010-06-15 12:33:00-07 | 2010-06-15 12:00:00-07
1 | 2010-06-15 12:50:00-07 | 2010-06-15 12:00:00-07
1 | 2010-06-15 14:30:00-07 | 2010-06-15 14:30:00-07
2 | 2010-06-15 12:00:00-07 | 2010-06-15 12:00:00-07
2 | 2010-06-15 12:15:00-07 | 2010-06-15 12:00:00-07
2 | 2010-06-15 12:40:00-07 | 2010-06-15 12:00:00-07
2 | 2010-06-15 13:02:00-07 | 2010-06-15 12:00:00-07
2 | 2010-06-15 14:03:00-07 | 2010-06-15 14:03:00-07
2 | 2010-06-15 15:04:00-07 | 2010-06-15 15:04:00-07
2 | 2010-06-15 18:50:00-07 | 2010-06-15 18:50:00-07
3 | 2010-06-15 19:30:00-07 | 2010-06-15 19:30:00-07
3 | 2010-06-15 19:30:01-07 | 2010-06-15 19:30:00-07
3 | 2010-06-15 19:30:02-07 | 2010-06-15 19:30:00-07
3 | 2010-06-15 19:30:03-07 | 2010-06-15 19:30:00-07
3 | 2010-06-15 19:30:04-07 | 2010-06-15 19:30:00-07
5 | 2010-06-16 19:30:04-07 | 2010-06-16 19:30:04-07
5 | 2010-06-16 19:35:04-07 | 2010-06-16 19:30:04-07
5 | 2010-06-16 19:40:04-07 | 2010-06-16 19:30:04-07
5 | 2010-06-16 19:45:04-07 | 2010-06-16 19:30:04-07
5 | 2010-06-16 19:56:04-07 | 2010-06-16 19:30:04-07
(26 rows)
create table user_session_length as
select
user_id,
session_start,
max(event_time) - session_start as session_length,
count(*) as num_events
from user_events_sessionized
group by user_id, session_start
order by session_start;
SELECT 8
select * from user_session_length;
user_id | session_start | session_length | num_events
---------+------------------------+----------------+------------
1 | 2010-06-15 12:00:00-07 | 00:50:00 | 8
2 | 2010-06-15 12:00:00-07 | 01:02:00 | 4
2 | 2010-06-15 14:03:00-07 | 00:00:00 | 1
1 | 2010-06-15 14:30:00-07 | 00:00:00 | 1
2 | 2010-06-15 15:04:00-07 | 00:00:00 | 1
2 | 2010-06-15 18:50:00-07 | 00:00:00 | 1
3 | 2010-06-15 19:30:00-07 | 00:00:04 | 5
5 | 2010-06-16 19:30:04-07 | 00:26:00 | 5
(8 rows)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment