Skip to content

Instantly share code, notes, and snippets.

Created July 11, 2012 16:07
Discussion on SO about identifying paired entries in table
/*
Author: SPFiredrake
Notes:
This is in regards to a discussion between myself, ErikE and Gordon Linoff. Erik suggested I create a little scriptlet that would create some test data for us to run some actual analysis on, so this baby was born. Table creation is included, so either uncomment the table code or just highlight and run that snippet in your tool of choice (SSMS).
This requires SQL Server 2005+, as it makes use of CTEs and CROSS/OUTER APPLYs.
Link:
http://stackoverflow.com/questions/11403594/sql-query-to-identify-paired-items-challenging
*/
-- Create Table
/*
create table #t (
dt datetime,
tapeID varchar(255),
BackupStatus varchar(255)
)
CREATE CLUSTERED INDEX t_cluster ON #t (tapeID, dt) WITH (FILLFACTOR=90) ON [PRIMARY]
CREATE INDEX t_tempbackup ON #t(BackupStatus)
*/
CREATE TABLE #ttemp (dt datetime, tapeID varchar(255), BackupStatus varchar(255) )
CREATE CLUSTERED INDEX ttemp_tempcluster ON #ttemp (tapeID, dt)
DECLARE @cntTable TABLE (N INT)
DECLARE @dtTable TABLE (DateEntry DATETIME)
DECLARE @tapeIDs TABLE (TapeID VARCHAR(5))
; WITH t1 AS (SELECT 0 N UNION ALL SELECT 1 + N FROM t1 WHERE N < 300)
INSERT INTO @cntTable -- Tally Table
SELECT * FROM t1 OPTION(MAXRECURSION 1000)
INSERT INTO @dtTable ( DateEntry ) -- All date entries we'll consider. 2 months worth, 8AM-8PM every day.
SELECT DATEADD(mi, C.N*5, '2012-01-01 08:00:00') + c2.N FROM @cntTable C CROSS APPLY @cntTable C2 WHERE C2.N < 210 AND c.N < 145
INSERT INTO @tapeIDs -- List of tapes.
SELECT 'ID' + CAST(N AS VARCHAR(5)) FROM @cntTable WHERE N BETWEEN 31 AND 40
-- Insert all Start entries first. This is to make sure we don't have unmatched 'End' entries.
INSERT INTO #ttemp ( dt, tapeID, BackupStatus )
SELECT
dt.DateEntry, t.TapeID, 'Start' BackupStatus
FROM
@dtTable dt
CROSS JOIN @tapeIDs t
CROSS APPLY (SELECT RAND(CHECKSUM(NEWID())) StartRate) g
WHERE
g.StartRate > 0.75
-- Insert all End entries. We make sure to insert after the first Start entry for that day.
INSERT INTO #ttemp ( dt, tapeID, BackupStatus )
SELECT
dt.DateEntry, t.TapeID, 'End' BackupStatus
FROM
@dtTable dt
CROSS JOIN @tapeIDs t
CROSS APPLY (SELECT CONVERT(VARCHAR(10), dt.DateEntry, 121) thisDT, RAND(CHECKSUM(NEWID())) EndRate) g
JOIN (SELECT tapeID, CONVERT(VARCHAR(10), dt, 121) dt, MIN(dt) mindt FROM #ttemp GROUP BY tapeID, CONVERT(VARCHAR(10), dt, 121))
mdt ON t.TapeID = mdt.tapeID AND g.thisDT = mdt.dt AND dt.DateEntry > mdt.mindt
WHERE
g.EndRate > 0.90
-- Correct any days that end with a Start entry to have a matching End entry.
INSERT INTO #ttemp ( dt, tapeID, BackupStatus )
SELECT
DATEADD(mi, (CAST(RAND(Checksum(NewId())) * 20 AS int) + 1) * 5, t1.dt), t1.tapeID, 'End'
FROM
#ttemp t1
JOIN (SELECT tapeID, CONVERT(VARCHAR(10), dt, 121) dt, MAX(dt) maxdt FROM #ttemp GROUP BY tapeID, CONVERT(VARCHAR(10), dt, 121))
g ON t1.tapeID = g.tapeID AND maxdt = t1.dt
WHERE
t1.BackupStatus = 'Start'
-- Delete any Start entries matching End entries
DELETE t2
FROM #ttemp t1
JOIN #ttemp t2 ON t1.tapeID = t2.tapeID AND t1.dt = t2.dt
WHERE t1.BackupStatus = 'End' AND t2.backupstatus = 'Start'
-- Delete any consecutive End entries.
DELETE t2
FROM #ttemp t1
CROSS APPLY(SELECT TOP 1 BackupStatus, dt FROM #ttemp WHERE dt > t1.dt AND tapeID = t1.tapeID) t2
WHERE t1.BackupStatus = 'End' AND t2.backupstatus = 'End'--) g ON g.dt = t1.dt AND g.tapeid = t1.tapeid
INSERT INTO #t ( dt, tapeID, BackupStatus )
SELECT * FROM #ttemp ORDER BY dt
DROP INDEX ttemp_tempcluster ON #ttemp
DROP TABLE #ttemp
-- Cleanup code.
/*
TRUNCATE TABLE #t
DROP INDEX t_tempbackup on #t
DROP INDEX t_cluster on #t
DROP TABLE #t
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment