Created
July 11, 2012 16:07
Discussion on SO about identifying paired entries in table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Author: SPFiredrake | |
Notes: | |
This is in regards to a discussion between myself, ErikE and Gordon Linoff. Erik suggested I create a little scriptlet that would create some test data for us to run some actual analysis on, so this baby was born. Table creation is included, so either uncomment the table code or just highlight and run that snippet in your tool of choice (SSMS). | |
This requires SQL Server 2005+, as it makes use of CTEs and CROSS/OUTER APPLYs. | |
Link: | |
http://stackoverflow.com/questions/11403594/sql-query-to-identify-paired-items-challenging | |
*/ | |
-- Create Table | |
/* | |
create table #t ( | |
dt datetime, | |
tapeID varchar(255), | |
BackupStatus varchar(255) | |
) | |
CREATE CLUSTERED INDEX t_cluster ON #t (tapeID, dt) WITH (FILLFACTOR=90) ON [PRIMARY] | |
CREATE INDEX t_tempbackup ON #t(BackupStatus) | |
*/ | |
CREATE TABLE #ttemp (dt datetime, tapeID varchar(255), BackupStatus varchar(255) ) | |
CREATE CLUSTERED INDEX ttemp_tempcluster ON #ttemp (tapeID, dt) | |
DECLARE @cntTable TABLE (N INT) | |
DECLARE @dtTable TABLE (DateEntry DATETIME) | |
DECLARE @tapeIDs TABLE (TapeID VARCHAR(5)) | |
; WITH t1 AS (SELECT 0 N UNION ALL SELECT 1 + N FROM t1 WHERE N < 300) | |
INSERT INTO @cntTable -- Tally Table | |
SELECT * FROM t1 OPTION(MAXRECURSION 1000) | |
INSERT INTO @dtTable ( DateEntry ) -- All date entries we'll consider. 2 months worth, 8AM-8PM every day. | |
SELECT DATEADD(mi, C.N*5, '2012-01-01 08:00:00') + c2.N FROM @cntTable C CROSS APPLY @cntTable C2 WHERE C2.N < 210 AND c.N < 145 | |
INSERT INTO @tapeIDs -- List of tapes. | |
SELECT 'ID' + CAST(N AS VARCHAR(5)) FROM @cntTable WHERE N BETWEEN 31 AND 40 | |
-- Insert all Start entries first. This is to make sure we don't have unmatched 'End' entries. | |
INSERT INTO #ttemp ( dt, tapeID, BackupStatus ) | |
SELECT | |
dt.DateEntry, t.TapeID, 'Start' BackupStatus | |
FROM | |
@dtTable dt | |
CROSS JOIN @tapeIDs t | |
CROSS APPLY (SELECT RAND(CHECKSUM(NEWID())) StartRate) g | |
WHERE | |
g.StartRate > 0.75 | |
-- Insert all End entries. We make sure to insert after the first Start entry for that day. | |
INSERT INTO #ttemp ( dt, tapeID, BackupStatus ) | |
SELECT | |
dt.DateEntry, t.TapeID, 'End' BackupStatus | |
FROM | |
@dtTable dt | |
CROSS JOIN @tapeIDs t | |
CROSS APPLY (SELECT CONVERT(VARCHAR(10), dt.DateEntry, 121) thisDT, RAND(CHECKSUM(NEWID())) EndRate) g | |
JOIN (SELECT tapeID, CONVERT(VARCHAR(10), dt, 121) dt, MIN(dt) mindt FROM #ttemp GROUP BY tapeID, CONVERT(VARCHAR(10), dt, 121)) | |
mdt ON t.TapeID = mdt.tapeID AND g.thisDT = mdt.dt AND dt.DateEntry > mdt.mindt | |
WHERE | |
g.EndRate > 0.90 | |
-- Correct any days that end with a Start entry to have a matching End entry. | |
INSERT INTO #ttemp ( dt, tapeID, BackupStatus ) | |
SELECT | |
DATEADD(mi, (CAST(RAND(Checksum(NewId())) * 20 AS int) + 1) * 5, t1.dt), t1.tapeID, 'End' | |
FROM | |
#ttemp t1 | |
JOIN (SELECT tapeID, CONVERT(VARCHAR(10), dt, 121) dt, MAX(dt) maxdt FROM #ttemp GROUP BY tapeID, CONVERT(VARCHAR(10), dt, 121)) | |
g ON t1.tapeID = g.tapeID AND maxdt = t1.dt | |
WHERE | |
t1.BackupStatus = 'Start' | |
-- Delete any Start entries matching End entries | |
DELETE t2 | |
FROM #ttemp t1 | |
JOIN #ttemp t2 ON t1.tapeID = t2.tapeID AND t1.dt = t2.dt | |
WHERE t1.BackupStatus = 'End' AND t2.backupstatus = 'Start' | |
-- Delete any consecutive End entries. | |
DELETE t2 | |
FROM #ttemp t1 | |
CROSS APPLY(SELECT TOP 1 BackupStatus, dt FROM #ttemp WHERE dt > t1.dt AND tapeID = t1.tapeID) t2 | |
WHERE t1.BackupStatus = 'End' AND t2.backupstatus = 'End'--) g ON g.dt = t1.dt AND g.tapeid = t1.tapeid | |
INSERT INTO #t ( dt, tapeID, BackupStatus ) | |
SELECT * FROM #ttemp ORDER BY dt | |
DROP INDEX ttemp_tempcluster ON #ttemp | |
DROP TABLE #ttemp | |
-- Cleanup code. | |
/* | |
TRUNCATE TABLE #t | |
DROP INDEX t_tempbackup on #t | |
DROP INDEX t_cluster on #t | |
DROP TABLE #t | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment