Created
April 15, 2015 10:07
-
-
Save willycs40/4e79f9a4d64dc4c35fd7 to your computer and use it in GitHub Desktop.
Break Timeseries into Sessions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--Test Data | |
CREATE TABLE #Log | |
( UserID int, [Timestamp] datetime) | |
INSERT INTO #Log | |
VALUES | |
(1, '2014-10-26 10:51:18'), (1, '2014-10-26 10:52:18'), (1, '2014-10-26 10:55:18'), (1, '2014-10-26 10:59:18'), (1, '2014-10-26 15:01:18'), (1, '2014-10-26 15:01:21'), (1, '2014-10-27 21:22:19'), | |
(2, '2014-10-15 12:19:01'), (2, '2014-10-15 12:19:12'), (2, '2014-10-15 12:19:45'), (2, '2014-10-15 12:20:03'), (2, '2014-10-17 14:55:13'), (2, '2014-10-17 14:55:19'), | |
(3, '2014-10-22 14:55:19') | |
--Use temp tables and indexes rather than cte's as it's going to be a lot of data. | |
--Get the max and min timestamp for each user | |
SELECT UserID, MIN([Timestamp]) AS [MinTimestamp], MAX([Timestamp]) AS [MaxTimestamp] | |
INTO #MinMax | |
FROM #Log | |
GROUP BY UserID | |
--Create a new copy of the data with the row_number field (or just add the row number in situ) | |
SELECT UserID, [Timestamp], row_number() OVER (PARTITION BY UserID ORDER BY [TimeStamp]) AS [ConsecutiveID] | |
INTO #Consec | |
FROM #Log | |
--Find all the gaps greater than 30 minutes | |
SELECT c1.UserID, c1.[Timestamp] AS [GapStart], c2.[Timestamp] AS [GapEnd] | |
INTO #SessionGaps | |
FROM #Consec c1 | |
INNER JOIN #Consec c2 | |
ON c1.UserID = c2.UserID | |
AND c1.ConsecutiveID = c2.ConsecutiveID - 1 | |
AND DATEDIFF(minute, c1.[Timestamp], c2.[Timestamp]) > 30 | |
--Union the gaps with the start and stop times for each user. | |
;WITH cte AS ( | |
SELECT UserID, GapStart AS [SessionEnd], GapEnd AS [SessionStart] | |
FROM #SessionGaps | |
UNION ALL | |
SELECT UserID, MaxTimestamp, NULL | |
FROM #MinMax | |
UNION ALL | |
SELECT UserID, NULL, MinTimestamp | |
FROM #MinMax | |
) | |
, cte2 AS ( --now add a row number to order the 'gaps' | |
SELECT *, row_number() OVER (PARTITION BY UserID ORDER BY SessionEnd) AS [SessionID] | |
FROM cte | |
) | |
SELECT c1.UserID, --finally, join consecutive rows, taking the start and end times from the different rows | |
c1.SessionStart, | |
c2.SessionEnd, | |
c1.[SessionID] | |
INTO #Sessions | |
FROM cte2 c1 | |
INNER JOIN cte2 c2 | |
ON c1.UserID = c2.UserID | |
AND c1.[SessionID] = c2.[SessionID] -1 | |
ORDER BY c1.UserID, c1.SessionStart | |
--Now do some stats on the sessions | |
SELECT s.UserID, s.SessionID, count(1) AS [TransactionCount] | |
INTO #SessionRowCounts | |
FROM #Log l | |
INNER JOIN #Sessions s | |
ON l.UserID = s.UserID | |
AND l.[Timestamp] BETWEEN s.SessionStart AND s.SessionEnd | |
GROUP BY s.UserID, s.[SessionID] | |
SELECT s.UserID | |
, s.SessionID | |
, SessionStart | |
, SessionEnd | |
, DATEDIFF(second, SessionStart, SessionEnd) AS [SessionLength] | |
, TransactionCount | |
FROM #Sessions s | |
INNER JOIN #SessionRowCounts src | |
ON s.UserID = src.UserID | |
AND s.SessionID = src.SessionID |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment