Skip to content

Instantly share code, notes, and snippets.

@dmarkow
Last active December 20, 2015 17:48
Show Gist options
  • Save dmarkow/6170832 to your computer and use it in GitHub Desktop.
Save dmarkow/6170832 to your computer and use it in GitHub Desktop.
Levenshtein distance of company names
with foo as (select company,
length(company),
levenshtein('Company Name', company) from contacts)
select company from contacts where
company in
(select company from foo where company != contacts.company and
(
select (1 - (cast(levenshtein as real)/greatest(length(contacts.company),length)))
) > 0.8
);
with foo as (select company,
length(company),
levenshtein('Company Name', company) from contacts)
select company from contacts where
(select count(*) from foo where company != contacts.company and
(
select (1 - (cast(levenshtein as real)/greatest(length(contacts.company),length)))
) > 0.8
) > 0;
with foo as (select company,
length(company),
levenshtein('Company Name', company) from contacts)
select company from contacts where
(select count(id) from contacts as sub where sub.company != contacts.company and
(
select (1 - (cast(levenshtein(contacts.company, sub.company) as real)/greatest(length(contacts.company),length(sub.company))))
) > 0.8
) > 0;
select count(company) from contacts where company != 'Sedgwick Detert Moran & Arnold LLP' and id in
(select id from contacts as sub where
(
select (1 - (cast(levenshtein('Sedgwick Detert Moran & Arnold LLP', sub.company) as real)/greatest(length('Sedgwick Detert Moran & Arnold LLP'),length(sub.company))))
) > 0.50
);
with main as (select distinct(company) from contacts order by company)
select company from main where company != '' and company is not null and (select count(company) from contacts where company != main.company and id in
(select id from contacts as sub where
(
select (1 - (cast(levenshtein(main.company, sub.company) as real)/greatest(length(main.company),length(sub.company))))
) > 0.80
)) > 0 limit 50;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment