Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Associative Grouping using tSQL
IF db_id('demo') IS NULL
CREATE DATABASE demo
GO
USE demo
GO
-- Nodes
DROP TABLE IF EXISTS SupplierNodes;
CREATE TABLE SupplierNodes
(
Id INT IDENTITY(1,1) NOT NULL
,SupplierName VARCHAR(30) NOT NULL
,TaxNumber INT NOT NULL
,BankSortCode CHAR(8) NOT NULL
,BankAccountNumber INT NOT NULL
)AS NODE;
INSERT INTO SupplierNodes (SupplierName, TaxNumber, BankSortCode, BankAccountNumber)
VALUES ('AdventureWorks Ltd.', 12345678, '02-11-33', 12345678)
,('AdventureWorks', 12345678, '02-55-44', 15161718)
,('ADVENTURE WORKS', 23344556, '02-55-44', 15161718)
,('ADVENTURE WORKS LTD.', 23344556, '02-77-66', 99887766)
,('AW Bike Co', 23344556, '02-88-00', 11991199)
,('Big Bike Discounts (AW)', 55556666, '02-88-00', 11991199)
,('Contoso Corp', 90001000, '02-99-02', 12341234);
SELECT $node_id, SupplierName, TaxNumber, BankSortCode, BankAccountNumber
FROM SupplierNodes
-- Edges
DROP TABLE IF EXISTS SupplierEdges;
CREATE TABLE SupplierEdges
(
LinkType VARCHAR(100) NOT NULL
)AS EDGE;
WITH CTE_Edges
AS (-- Add the tax number links
SELECT s1.Id AS FromId
,s2.Id AS ToId
,'TaxNumber' AS LinkType
FROM SupplierNodes AS s1
INNER JOIN SupplierNodes AS s2
ON s1.TaxNumber = s2.TaxNumber
UNION
-- And the bank account links
SELECT s1.Id AS FromId
,s2.Id AS ToId
,'BankSortCode, BankAccountNumber' AS LinkType
FROM SupplierNodes AS s1
INNER JOIN SupplierNodes AS s2
ON s1.BankSortCode = s2.BankSortCode
AND s1.BankAccountNumber = s2.BankAccountNumber
)
INSERT INTO SupplierEdges ($from_id, $to_id, LinkType)
SELECT (SELECT $node_id FROM SupplierNodes WHERE Id = e.FromId) AS FromId
,(SELECT $node_id FROM SupplierNodes WHERE Id = e.ToId) AS ToId
,STRING_AGG(e.LinkType, ' / ')
FROM CTE_Edges AS e
GROUP BY e.FromId
,e.ToId;
WITH CTE_Links
AS (SELECT s1.Id
,CAST(s1.Id AS VARCHAR(10)) + ':' + STRING_AGG(s2.Id, ':') WITHIN GROUP (GRAPH PATH) AS GraphPath
,COUNT(s2.Id) WITHIN GROUP (GRAPH PATH) AS LevelCount
FROM SupplierNodes AS s1
,SupplierEdges FOR PATH AS lnk
,SupplierNodes FOR PATH AS s2
-- The SHORTEST_PATH() function is brand new in SQL Server 2019 CTP3.1
WHERE MATCH (SHORTEST_PATH ( s1(-(lnk)->s2)+) )
)
,CTE_Groups
AS (SELECT lnk.value AS Id
,MIN(lnks.Id) AS GroupId
FROM CTE_LINKS AS lnks
CROSS APPLY STRING_SPLIT(lnks.GraphPath, ':') AS lnk
GROUP BY lnk.value
)
SELECT s.*
,DENSE_RANK() OVER (ORDER BY g.GroupId) AS GraphId
FROM CTE_Groups AS g
INNER JOIN SupplierNodes AS s
ON s.Id = g.Id
ORDER BY s.Id;
IF db_id('demo') IS NULL
CREATE DATABASE demo
GO
USE demo
GO
DROP TABLE IF EXISTS Suppliers;
CREATE TABLE Suppliers
(
Id INT IDENTITY(1,1) NOT NULL
,SupplierName VARCHAR(30) NOT NULL
,TaxNumber INT NOT NULL
,BankSortCode CHAR(8) NOT NULL
,BankAccountNumber INT NOT NULL
);
GO
INSERT INTO Suppliers (SupplierName, TaxNumber, BankSortCode, BankAccountNumber)
VALUES ('AdventureWorks Ltd.', 12345678, '02-11-33', 12345678)
,('AdventureWorks', 12345678, '02-55-44', 15161718)
,('ADVENTURE WORKS', 23344556, '02-55-44', 15161718)
,('ADVENTURE WORKS LTD.', 23344556, '02-77-66', 99887766)
,('AW Bike Co', 23344556, '02-88-00', 11991199)
,('Big Bike Discounts (AW)', 55556666, '02-88-00', 11991199)
,('Contoso Corp', 90001000, '02-99-02', 12341234);
WITH CTE_Groups
AS (SELECT s.Id
,s.SupplierName
,s.TaxNumber
,s.BankSortCode
,s.BankAccountNumber
,DENSE_RANK() OVER (ORDER BY s.TaxNumber) AS TaxNumberGroup
,10 + DENSE_RANK() OVER (ORDER BY s.BankSortCode, s.BankAccountNumber) AS BankAccountGroup
FROM Suppliers AS s
)
,CTE_Links
AS (SELECT g1.TaxNumberGroup
,g1.BankAccountGroup
,g2.TaxNumberGroup AS TaxNumberGroupLink
,( CASE WHEN g1.TaxNumberGroup > g2.TaxNumberGroup THEN 1 ELSE 0 END ) AS IsDupeEdge
,CAST( ':' + CAST(g1.TaxNumberGroup AS VARCHAR(10)) + ':' + CAST(g1.BankAccountGroup AS VARCHAR(10)) + ':' + CAST(g2.TaxNumberGroup AS VARCHAR(10)) + ':' AS VARCHAR(MAX) ) AS FullPath
,CAST( CAST(g1.BankAccountGroup AS VARCHAR(10)) + ':' + CAST(g2.TaxNumberGroup AS VARCHAR(10)) + ':' AS VARCHAR(MAX) ) AS SubPath
FROM CTE_Groups AS g1
LEFT JOIN CTE_Groups AS g2
ON g1.BankAccountGroup = g2.BankAccountGroup
AND g1.TaxNumberGroup <> g2.TaxNumberGroup
GROUP BY g1.TaxNumberGroup
,g1.BankAccountGroup
,g2.TaxNumberGroup
)
,RCTE_PathWalker
AS (SELECT l.TaxNumberGroup
,l.BankAccountGroup
,l.TaxNumberGroupLink
,l.TaxNumberGroup AS GraphGroup -- Reuse as unique per disconnected graph
,1 AS IterationCount
,l.FullPath
FROM CTE_Links as l
WHERE l.IsDupeEdge = 0
UNION ALL
SELECT c.TaxNumberGroup
,c.BankAccountGroup
,c.TaxNumberGroupLink
,p.GraphGroup
,p.IterationCount + 1
,p.FullPath + c.SubPath
FROM RCTE_PathWalker AS p
INNER JOIN CTE_Links AS c
ON c.TaxNumberGroup = p.TaxNumberGroupLink
-- Guard against circular paths through the graph
AND p.FullPath NOT LIKE '%' + c.FullPath + '%'
-- Another gaurd against circular paths through the graph
WHERE p.IterationCount < 100
)
SELECT s.Id
,s.SupplierName
,s.TaxNumber
,s.BankSortCode
,s.BankAccountNumber
,g.GraphGroup
FROM CTE_Groups AS s
LEFT JOIN (SELECT pw.TaxNumberGroup
,DENSE_RANK() OVER (ORDER BY MIN(pw.GraphGroup)) AS GraphGroup
FROM RCTE_PathWalker AS pw
GROUP BY pw.TaxNumberGroup
)AS g
ON g.TaxNumberGroup = s.TaxNumberGroup
ORDER BY GraphGroup, Id;
--INSERT INTO Supplier (SupplierName, TaxNumber, BankSortCode, BankAccountNumber)
INSERT INTO SupplierNodes (SupplierName, TaxNumber, BankSortCode, BankAccountNumber)
VALUES ('AdventureWorks Ltd.', 12345678, '02-11-33', 12345678)
,('AdventureWorks Limited', 12345678, '02-54-44', 32165487)
,('AdventureWorks', 12345678, '02-55-44', 15161718)
,('AW Limited', 98768765, '02-55-44', 15161718)
,('ADVENTURE WORKS', 98765432, '02-55-44', 15161718)
,('AW Bike Co', 98765432, '02-66-00', 11991199)
,('AW Bike Co', 12345678, '02-66-00', 11991199)
,('Big Bike Discounts (AW)', 55556666, '02-66-00', 11991199)
,('Contoso Corp', 90001000, '02-99-02', 12341234);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.