Skip to content

Instantly share code, notes, and snippets.

@bduggan
Created March 24, 2016 16:02
Show Gist options
  • Save bduggan/de8a7d319638c19d1d4d to your computer and use it in GitHub Desktop.
Save bduggan/de8a7d319638c19d1d4d to your computer and use it in GitHub Desktop.
/*
* CREATE EXTENSION pg_trgm;
*
* recursive portion taken directly from:
* http://www.postgresql.org/docs/9.5/static/queries-with.html
*/
WITH RECURSIVE search_graph(id, link, name, depth, path, cycle) AS (
SELECT g.id, g.link, g.name, 1,
ARRAY[g.id],
false
FROM graph g
UNION ALL
SELECT g.id, g.link, g.name, sg.depth + 1,
path || g.id,
g.id = ANY(path)
FROM graph g, search_graph sg
WHERE g.id = sg.link AND NOT cycle
),
test_leads as (
select * from leads limit 200
),
graph as (
select l.id as id, m.id as link, m.name
from test_leads l inner join test_leads m
on similarity(m.name, l.name) > 0.5 and l.id != m.id
)
SELECT id,
array_agg(distinct(name)) as names,
array_agg(distinct(link)) as links
FROM search_graph
group by 1
having count(distinct(name)) > 1
id | names | links
------+------------------------------------------------------------------------------------+------------------
2614 | {"MSS Technologies, Inc.","United Technologies"} | {2506,2572}
2628 | {"GA State University","Georgia State University","The University of Georgia"} | {2637,2668,2670}
2637 | {"GA State University","Georgia Statue University","The University of Georgia"} | {2628,2668,2670}
2668 | {"Georgia State University","Georgia Statue University","University System of GA"} | {2628,2637,2649}
2670 | {"Georgia State University","Georgia Statue University"} | {2628,2637}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment