Last active
July 5, 2019 16:02
-
-
Save NJLangley/d069953a597a1482e68d1cfa5a3225d2 to your computer and use it in GitHub Desktop.
Associative Grouping using tSQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
IF db_id('demo') IS NULL | |
CREATE DATABASE demo | |
GO | |
USE demo | |
GO | |
-- Nodes | |
DROP TABLE IF EXISTS SupplierNodes; | |
CREATE TABLE SupplierNodes | |
( | |
Id INT IDENTITY(1,1) NOT NULL | |
,SupplierName VARCHAR(30) NOT NULL | |
,TaxNumber INT NOT NULL | |
,BankSortCode CHAR(8) NOT NULL | |
,BankAccountNumber INT NOT NULL | |
)AS NODE; | |
INSERT INTO SupplierNodes (SupplierName, TaxNumber, BankSortCode, BankAccountNumber) | |
VALUES ('AdventureWorks Ltd.', 12345678, '02-11-33', 12345678) | |
,('AdventureWorks', 12345678, '02-55-44', 15161718) | |
,('ADVENTURE WORKS', 23344556, '02-55-44', 15161718) | |
,('ADVENTURE WORKS LTD.', 23344556, '02-77-66', 99887766) | |
,('AW Bike Co', 23344556, '02-88-00', 11991199) | |
,('Big Bike Discounts (AW)', 55556666, '02-88-00', 11991199) | |
,('Contoso Corp', 90001000, '02-99-02', 12341234); | |
SELECT $node_id, SupplierName, TaxNumber, BankSortCode, BankAccountNumber | |
FROM SupplierNodes | |
-- Edges | |
DROP TABLE IF EXISTS SupplierEdges; | |
CREATE TABLE SupplierEdges | |
( | |
LinkType VARCHAR(100) NOT NULL | |
)AS EDGE; | |
WITH CTE_Edges | |
AS (-- Add the tax number links | |
SELECT s1.Id AS FromId | |
,s2.Id AS ToId | |
,'TaxNumber' AS LinkType | |
FROM SupplierNodes AS s1 | |
INNER JOIN SupplierNodes AS s2 | |
ON s1.TaxNumber = s2.TaxNumber | |
UNION | |
-- And the bank account links | |
SELECT s1.Id AS FromId | |
,s2.Id AS ToId | |
,'BankSortCode, BankAccountNumber' AS LinkType | |
FROM SupplierNodes AS s1 | |
INNER JOIN SupplierNodes AS s2 | |
ON s1.BankSortCode = s2.BankSortCode | |
AND s1.BankAccountNumber = s2.BankAccountNumber | |
) | |
INSERT INTO SupplierEdges ($from_id, $to_id, LinkType) | |
SELECT (SELECT $node_id FROM SupplierNodes WHERE Id = e.FromId) AS FromId | |
,(SELECT $node_id FROM SupplierNodes WHERE Id = e.ToId) AS ToId | |
,STRING_AGG(e.LinkType, ' / ') | |
FROM CTE_Edges AS e | |
GROUP BY e.FromId | |
,e.ToId; | |
WITH CTE_Links | |
AS (SELECT s1.Id | |
,CAST(s1.Id AS VARCHAR(10)) + ':' + STRING_AGG(s2.Id, ':') WITHIN GROUP (GRAPH PATH) AS GraphPath | |
,COUNT(s2.Id) WITHIN GROUP (GRAPH PATH) AS LevelCount | |
FROM SupplierNodes AS s1 | |
,SupplierEdges FOR PATH AS lnk | |
,SupplierNodes FOR PATH AS s2 | |
-- The SHORTEST_PATH() function is brand new in SQL Server 2019 CTP3.1 | |
WHERE MATCH (SHORTEST_PATH ( s1(-(lnk)->s2)+) ) | |
) | |
,CTE_Groups | |
AS (SELECT lnk.value AS Id | |
,MIN(lnks.Id) AS GroupId | |
FROM CTE_LINKS AS lnks | |
CROSS APPLY STRING_SPLIT(lnks.GraphPath, ':') AS lnk | |
GROUP BY lnk.value | |
) | |
SELECT s.* | |
,DENSE_RANK() OVER (ORDER BY g.GroupId) AS GraphId | |
FROM CTE_Groups AS g | |
INNER JOIN SupplierNodes AS s | |
ON s.Id = g.Id | |
ORDER BY s.Id; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
IF db_id('demo') IS NULL | |
CREATE DATABASE demo | |
GO | |
USE demo | |
GO | |
DROP TABLE IF EXISTS Suppliers; | |
CREATE TABLE Suppliers | |
( | |
Id INT IDENTITY(1,1) NOT NULL | |
,SupplierName VARCHAR(30) NOT NULL | |
,TaxNumber INT NOT NULL | |
,BankSortCode CHAR(8) NOT NULL | |
,BankAccountNumber INT NOT NULL | |
); | |
GO | |
INSERT INTO Suppliers (SupplierName, TaxNumber, BankSortCode, BankAccountNumber) | |
VALUES ('AdventureWorks Ltd.', 12345678, '02-11-33', 12345678) | |
,('AdventureWorks', 12345678, '02-55-44', 15161718) | |
,('ADVENTURE WORKS', 23344556, '02-55-44', 15161718) | |
,('ADVENTURE WORKS LTD.', 23344556, '02-77-66', 99887766) | |
,('AW Bike Co', 23344556, '02-88-00', 11991199) | |
,('Big Bike Discounts (AW)', 55556666, '02-88-00', 11991199) | |
,('Contoso Corp', 90001000, '02-99-02', 12341234); | |
WITH CTE_Groups | |
AS (SELECT s.Id | |
,s.SupplierName | |
,s.TaxNumber | |
,s.BankSortCode | |
,s.BankAccountNumber | |
,DENSE_RANK() OVER (ORDER BY s.TaxNumber) AS TaxNumberGroup | |
,10 + DENSE_RANK() OVER (ORDER BY s.BankSortCode, s.BankAccountNumber) AS BankAccountGroup | |
FROM Suppliers AS s | |
) | |
,CTE_Links | |
AS (SELECT g1.TaxNumberGroup | |
,g1.BankAccountGroup | |
,g2.TaxNumberGroup AS TaxNumberGroupLink | |
,( CASE WHEN g1.TaxNumberGroup > g2.TaxNumberGroup THEN 1 ELSE 0 END ) AS IsDupeEdge | |
,CAST( ':' + CAST(g1.TaxNumberGroup AS VARCHAR(10)) + ':' + CAST(g1.BankAccountGroup AS VARCHAR(10)) + ':' + CAST(g2.TaxNumberGroup AS VARCHAR(10)) + ':' AS VARCHAR(MAX) ) AS FullPath | |
,CAST( CAST(g1.BankAccountGroup AS VARCHAR(10)) + ':' + CAST(g2.TaxNumberGroup AS VARCHAR(10)) + ':' AS VARCHAR(MAX) ) AS SubPath | |
FROM CTE_Groups AS g1 | |
LEFT JOIN CTE_Groups AS g2 | |
ON g1.BankAccountGroup = g2.BankAccountGroup | |
AND g1.TaxNumberGroup <> g2.TaxNumberGroup | |
GROUP BY g1.TaxNumberGroup | |
,g1.BankAccountGroup | |
,g2.TaxNumberGroup | |
) | |
,RCTE_PathWalker | |
AS (SELECT l.TaxNumberGroup | |
,l.BankAccountGroup | |
,l.TaxNumberGroupLink | |
,l.TaxNumberGroup AS GraphGroup -- Reuse as unique per disconnected graph | |
,1 AS IterationCount | |
,l.FullPath | |
FROM CTE_Links as l | |
WHERE l.IsDupeEdge = 0 | |
UNION ALL | |
SELECT c.TaxNumberGroup | |
,c.BankAccountGroup | |
,c.TaxNumberGroupLink | |
,p.GraphGroup | |
,p.IterationCount + 1 | |
,p.FullPath + c.SubPath | |
FROM RCTE_PathWalker AS p | |
INNER JOIN CTE_Links AS c | |
ON c.TaxNumberGroup = p.TaxNumberGroupLink | |
-- Guard against circular paths through the graph | |
AND p.FullPath NOT LIKE '%' + c.FullPath + '%' | |
-- Another gaurd against circular paths through the graph | |
WHERE p.IterationCount < 100 | |
) | |
SELECT s.Id | |
,s.SupplierName | |
,s.TaxNumber | |
,s.BankSortCode | |
,s.BankAccountNumber | |
,g.GraphGroup | |
FROM CTE_Groups AS s | |
LEFT JOIN (SELECT pw.TaxNumberGroup | |
,DENSE_RANK() OVER (ORDER BY MIN(pw.GraphGroup)) AS GraphGroup | |
FROM RCTE_PathWalker AS pw | |
GROUP BY pw.TaxNumberGroup | |
)AS g | |
ON g.TaxNumberGroup = s.TaxNumberGroup | |
ORDER BY GraphGroup, Id; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--INSERT INTO Supplier (SupplierName, TaxNumber, BankSortCode, BankAccountNumber) | |
INSERT INTO SupplierNodes (SupplierName, TaxNumber, BankSortCode, BankAccountNumber) | |
VALUES ('AdventureWorks Ltd.', 12345678, '02-11-33', 12345678) | |
,('AdventureWorks Limited', 12345678, '02-54-44', 32165487) | |
,('AdventureWorks', 12345678, '02-55-44', 15161718) | |
,('AW Limited', 98768765, '02-55-44', 15161718) | |
,('ADVENTURE WORKS', 98765432, '02-55-44', 15161718) | |
,('AW Bike Co', 98765432, '02-66-00', 11991199) | |
,('AW Bike Co', 12345678, '02-66-00', 11991199) | |
,('Big Bike Discounts (AW)', 55556666, '02-66-00', 11991199) | |
,('Contoso Corp', 90001000, '02-99-02', 12341234); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment