Skip to content

Instantly share code, notes, and snippets.

@willycs40
Created April 15, 2015 10:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save willycs40/4e79f9a4d64dc4c35fd7 to your computer and use it in GitHub Desktop.
Save willycs40/4e79f9a4d64dc4c35fd7 to your computer and use it in GitHub Desktop.
Break Timeseries into Sessions
--Test Data
CREATE TABLE #Log
( UserID int, [Timestamp] datetime)
INSERT INTO #Log
VALUES
(1, '2014-10-26 10:51:18'), (1, '2014-10-26 10:52:18'), (1, '2014-10-26 10:55:18'), (1, '2014-10-26 10:59:18'), (1, '2014-10-26 15:01:18'), (1, '2014-10-26 15:01:21'), (1, '2014-10-27 21:22:19'),
(2, '2014-10-15 12:19:01'), (2, '2014-10-15 12:19:12'), (2, '2014-10-15 12:19:45'), (2, '2014-10-15 12:20:03'), (2, '2014-10-17 14:55:13'), (2, '2014-10-17 14:55:19'),
(3, '2014-10-22 14:55:19')
--Use temp tables and indexes rather than cte's as it's going to be a lot of data.
--Get the max and min timestamp for each user
SELECT UserID, MIN([Timestamp]) AS [MinTimestamp], MAX([Timestamp]) AS [MaxTimestamp]
INTO #MinMax
FROM #Log
GROUP BY UserID
--Create a new copy of the data with the row_number field (or just add the row number in situ)
SELECT UserID, [Timestamp], row_number() OVER (PARTITION BY UserID ORDER BY [TimeStamp]) AS [ConsecutiveID]
INTO #Consec
FROM #Log
--Find all the gaps greater than 30 minutes
SELECT c1.UserID, c1.[Timestamp] AS [GapStart], c2.[Timestamp] AS [GapEnd]
INTO #SessionGaps
FROM #Consec c1
INNER JOIN #Consec c2
ON c1.UserID = c2.UserID
AND c1.ConsecutiveID = c2.ConsecutiveID - 1
AND DATEDIFF(minute, c1.[Timestamp], c2.[Timestamp]) > 30
--Union the gaps with the start and stop times for each user.
;WITH cte AS (
SELECT UserID, GapStart AS [SessionEnd], GapEnd AS [SessionStart]
FROM #SessionGaps
UNION ALL
SELECT UserID, MaxTimestamp, NULL
FROM #MinMax
UNION ALL
SELECT UserID, NULL, MinTimestamp
FROM #MinMax
)
, cte2 AS ( --now add a row number to order the 'gaps'
SELECT *, row_number() OVER (PARTITION BY UserID ORDER BY SessionEnd) AS [SessionID]
FROM cte
)
SELECT c1.UserID, --finally, join consecutive rows, taking the start and end times from the different rows
c1.SessionStart,
c2.SessionEnd,
c1.[SessionID]
INTO #Sessions
FROM cte2 c1
INNER JOIN cte2 c2
ON c1.UserID = c2.UserID
AND c1.[SessionID] = c2.[SessionID] -1
ORDER BY c1.UserID, c1.SessionStart
--Now do some stats on the sessions
SELECT s.UserID, s.SessionID, count(1) AS [TransactionCount]
INTO #SessionRowCounts
FROM #Log l
INNER JOIN #Sessions s
ON l.UserID = s.UserID
AND l.[Timestamp] BETWEEN s.SessionStart AND s.SessionEnd
GROUP BY s.UserID, s.[SessionID]
SELECT s.UserID
, s.SessionID
, SessionStart
, SessionEnd
, DATEDIFF(second, SessionStart, SessionEnd) AS [SessionLength]
, TransactionCount
FROM #Sessions s
INNER JOIN #SessionRowCounts src
ON s.UserID = src.UserID
AND s.SessionID = src.SessionID
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment