Skip to content

Instantly share code, notes, and snippets.

@wBobuk
Created April 10, 2018 21:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wBobuk/d4a1a71f226d28179cd5318a6f411faa to your computer and use it in GitHub Desktop.
Save wBobuk/d4a1a71f226d28179cd5318a6f411faa to your computer and use it in GitHub Desktop.
-- David Mauri
-- http://sqlblog.com/blogs/davide_mauri/archive/2017/11/12/lateral-thinking-transitive-closure-clustering-with-sql-server-uda-and-json.aspx
USE tempdb
GO
-- Graph version; nodes and edges
DROP TABLE IF EXISTS dbo.rawData
-- NODES
DROP TABLE IF EXISTS dbo.person
-- EDGES
DROP TABLE IF EXISTS dbo.isFriendOf
GO
CREATE TABLE dbo.rawData (
personId INT NOT NULL,
person CHAR(1),
isFriendOfId INT NOT NULL,
isFriendOf CHAR(1),
PRIMARY KEY ( person, isFriendOf ),
UNIQUE ( personId, isFriendOf )
)
CREATE TABLE dbo.person (
personId INT PRIMARY KEY,
person CHAR(1) UNIQUE NOT NULL
) AS NODE
CREATE TABLE dbo.isFriendOf AS EDGE;
GO
INSERT INTO dbo.rawData ( personId, person, isFriendOfId, isFriendOf )
VALUES
( 1, 'A', 2, 'B' ),
( 3, 'C', 1, 'A' ),
( 1, 'A', 4, 'D' ),
( 2, 'B', 5, 'E' ),
( 2, 'B', 6, 'F' ),
( 7, 'G', 8, 'H' ),
( 7, 'G', 9, 'J' ),
( 9, 'J', 10, 'L' ),
( 10, 'L', 11, 'M' )
GO
-- Populate node table
INSERT INTO dbo.person ( personId, person )
SELECT personId, person
FROM dbo.rawData
UNION
SELECT isFriendOfId, isFriendOf
FROM dbo.rawData
-- Populate edge table
INSERT INTO dbo.isFriendOf ( $from_id, $to_id )
SELECT f.$node_id, t.$node_id
FROM dbo.rawData rd
INNER JOIN dbo.person f ON rd.personId = f.personId
INNER JOIN dbo.person t ON rd.isFriendOfId = t.personId
GO
-- Run match queries
SELECT person1.personId, person2.personId isFriendOf
FROM dbo.person person1, dbo.isFriendOf isFriendOf, dbo.person person2
WHERE MATCH ( person1-(isFriendOf)->person2 );
GO
DROP FUNCTION IF EXISTS dbo.utf_tc;
GO
-- Looping function
DROP FUNCTION IF EXISTS dbo.utf_tc
GO
CREATE FUNCTION dbo.utf_tc ( @personId INT )
RETURNS @var TABLE
(
personId INT NOT NULL PRIMARY KEY NONCLUSTERED,
xlevel INT NOT NULL,
UNIQUE CLUSTERED( xlevel, personId )
)
AS
BEGIN
DECLARE @xlevel INT = 0
-- Get first node
INSERT INTO @var( personId, xlevel )
SELECT personId, @xlevel
FROM dbo.person
WHERE personId = @personId
-- Loop thru children
WHILE @@rowcount > 0
BEGIN
SET @xlevel += 1
-- Get children
INSERT INTO @var( personId, xlevel )
SELECT DISTINCT person2.personId, @xlevel
FROM @var AS tc, dbo.person person1, dbo.isFriendOf isFriendOf, dbo.person person2
WHERE tc.personId = person1.personId
AND MATCH ( person1-(isFriendOf)->person2 )
AND tc.xlevel = @xlevel - 1
AND NOT EXISTS ( -- id does not already exist
SELECT *
FROM @var
WHERE personId = person2.personId
)
END
RETURN
END
GO
/*
SELECT *
FROM dbo.utf_tc(3);
SELECT *
FROM dbo.utf_tc(7);
*/
DROP TABLE IF EXISTS #tmp
SELECT p.personId parentId, tc.*, COUNT(*) OVER( PARTITION BY p.personId ) steps
INTO #tmp
FROM dbo.person p
CROSS APPLY dbo.utf_tc( p.personId ) AS tc;
;WITH cte AS
(
SELECT *, RANK() OVER ( PARTITION BY personId ORDER BY steps DESC, parentId ) rn
FROM #tmp
)
SELECT DENSE_RANK() OVER ( ORDER BY parentId ) groupId, personId
FROM cte
WHERE rn = 1
ORDER BY groupId, personId
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment