Skip to content

Instantly share code, notes, and snippets.

@calvinclaus
Created November 13, 2020 12:29
Show Gist options
  • Save calvinclaus/95ee4a9ea347688cf4869eb99776cba1 to your computer and use it in GitHub Desktop.
Save calvinclaus/95ee4a9ea347688cf4869eb99776cba1 to your computer and use it in GitHub Desktop.
require 'rails_helper'
require_relative '../../lib/human_name_utils/human_name_utils.rb'
RSpec.describe "HumanNameUtils" do
def e(n1, n2)
expect(HumanNameUtils.fuzzy_equal?(n1, n2)).to eq(true), "expected #{n1} === #{n2}"
expect(HumanNameUtils.fuzzy_equal?(n2, n1)).to eq(true), "expected #{n1} === #{n2}"
end
def ne(n1, n2)
expect(HumanNameUtils.fuzzy_equal?(n1, n2)).to eq(false), "expected #{n1} !== #{n2}"
expect(HumanNameUtils.fuzzy_equal?(n2, n1)).to eq(false), "expected #{n1} !== #{n2}"
end
it "can cleanup names" do
expect(HumanNameUtils.cleanup_name("Calvin Claus BSc")).to eq("Calvin Claus")
expect(HumanNameUtils.cleanup_name("Karin Haberleithner, MSc, MBA")).to eq("Karin Haberleithner")
end
it "knows if two names are likely to be equal" do
e("Calvin Claus BSc", "Calvin Claus")
e("Claus Calvin BSc", "Calvin Claus")
e("Calvin Claus BSc", "Claus Calvin")
e("Shirin Badawi", "Shirin Badawi-Claus")
e("Shirin Badawi ❤️", "Shirin Badawi-Claus")
e("Hans-Christian K", "Hans-Christian Kern")
e("Hans-Christian K.", "Hans-Christian Kern")
e("Shirin Badawi", "Shirin Badawi")
e("Karl, Kern", "Karl Kern")
e("Kern, Karl", "Karl Kern")
e("Kern, Karl", "Kern Karl")
e("Dr. Hans-Christian Plschek", "Plschek Hans-Christian")
e("Dr. med. Heinrich Hechenblaickner", "Heinrich Hechenblaickner")
end
it "knows if tow name are likely not equal" do
ne("Christian Claus", "Calvin Claus")
ne("Julian Bauer", "Patrick Blaha")
ne("Hans-Christian K.", "Kristof Kern")
ne("Karl Kern", "Kristof K.")
ne("Karl Kern", "Kristof K")
end
it "can deal with umlauts + co" do
e("Peter Bäneton", "Peter Baeneton")
e("Peter Baneton", "Peter Baneton")
e("Peter Banèton", "Peter Baneton")
e("Peter Schußter", "Peter Schusster")
e("Karin Haberleithner, MSc, MBA", "Karin Haberleithner")
end
it "can clean and split name" do
expect(HumanNameUtils.clean_and_split_name("Calvin Claus").to_h).to eq(first: "Calvin", last: "Claus")
expect(HumanNameUtils.clean_and_split_name("BSc. Calvin Claus").to_h).to eq(first: "Calvin", last: "Claus")
expect(HumanNameUtils.clean_and_split_name("BSc. Calvin Claus MSc.").to_h).to eq(first: "Calvin", last: "Claus")
expect(HumanNameUtils.clean_and_split_name("BSc. Shirin Badawi-Claus MSc.").to_h).to eq(first: "Shirin", last: "Claus")
expect(HumanNameUtils.clean_and_split_name("BSc. shirin badawi").to_h).to eq(first: "Shirin", last: "Badawi")
expect(HumanNameUtils.clean_and_split_name("☛ Dipl.-Ing.(FH) Stefan Daschek").to_h).to eq(first: "Stefan", last: "Daschek")
expect(HumanNameUtils.clean_and_split_name("☛ Ing(FH) Stefan Daschek").to_h).to eq(first: "Stefan", last: "Daschek")
expect(HumanNameUtils.clean_and_split_name("☛ Dipl.-Ing.(FH (aber nur manchmal)) Stefan Daschek").to_h).to eq(first: "Stefan", last: "Daschek")
expect(HumanNameUtils.clean_and_split_name("☛ Ing Stefan Daschek").to_h).to eq(first: "Stefan", last: "Daschek")
expect(HumanNameUtils.clean_and_split_name("☛ Ing_ Stefan Daschek").to_h).to eq(first: "Stefan", last: "Daschek")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber Tcm.").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber M.A.").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber M.Sc.").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber m.sc.").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska_Huber m.sc.").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber | m.sc.").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber M.D.").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber M.A.").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber m.a.").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber ma").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber md.").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber md").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Dr. Franziska Huber md").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("dr Franziska Huber md").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("dr-Franziska Huber md").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("DR-PHIL Franziska Huber md").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("dr. phil. Franziska Huber md").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska Huber (random title)").to_h).to eq(first: "Franziska", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Franziska H. (random title)").to_h).to eq(first: "Franziska", last: "H.")
expect(HumanNameUtils.clean_and_split_name("Franziska H (random title)").to_h).to eq(first: "Franziska", last: "H.")
expect(HumanNameUtils.clean_and_split_name("Kirsten Waller, M.A.").to_h).to eq(first: "Kirsten", last: "Waller")
expect(HumanNameUtils.clean_and_split_name("Kirsten Cfalla CFA").to_h).to eq(first: "Kirsten", last: "Cfalla")
expect(HumanNameUtils.clean_and_split_name("Kirsten Cfalla cfa").to_h).to eq(first: "Kirsten", last: "Cfalla")
expect(HumanNameUtils.clean_and_split_name("Kirsten Tcmina tcm").to_h).to eq(first: "Kirsten", last: "Tcmina")
expect(HumanNameUtils.clean_and_split_name("Kirsten Tcmina TCM").to_h).to eq(first: "Kirsten", last: "Tcmina")
expect(HumanNameUtils.clean_and_split_name("Kirsten Tcmina Tcm").to_h).to eq(first: "Kirsten", last: "Tcmina")
expect(HumanNameUtils.clean_and_split_name("DI Marcel Nürnberg").to_h).to eq(first: "Marcel", last: "Nürnberg")
expect(HumanNameUtils.clean_and_split_name("DI Dieter Nürnberg").to_h).to eq(first: "Dieter", last: "Nürnberg")
expect(HumanNameUtils.clean_and_split_name("Dieter Nürnberg").to_h).to eq(first: "Dieter", last: "Nürnberg")
expect(HumanNameUtils.clean_and_split_name("Alexander-Peter Nürnberg").to_h).to eq(first: "Alexander", last: "Nürnberg")
expect(HumanNameUtils.clean_and_split_name("Alexander-Peter Nürnberg-Berger").to_h).to eq(first: "Alexander", last: "Berger")
expect(HumanNameUtils.clean_and_split_name("Josef-Alexander Haselberger").to_h).to eq(first: "Josef", last: "Haselberger")
expect(HumanNameUtils.clean_and_split_name("Carine Pottier - Assoc CIPD").to_h).to eq(first: "Carine", last: "Pottier")
expect(HumanNameUtils.clean_and_split_name("BSC - Carine Pottier - Assoc CIPD").to_h).to eq(first: "Carine", last: "Pottier")
expect(HumanNameUtils.clean_and_split_name("Keiron Molyneux BA(Hons) FdSc ACMI LCGI").to_h).to eq(first: "Keiron", last: "Molyneux")
expect(HumanNameUtils.clean_and_split_name("Peter Rees CFCIPD/ MSC").to_h).to eq(first: "Peter", last: "Rees")
expect(HumanNameUtils.clean_and_split_name("Parveen Baba MCIPD PMP®").to_h).to eq(first: "Parveen", last: "Baba")
expect(HumanNameUtils.clean_and_split_name("Kate Burrell FCIPD").to_h).to eq(first: "Kate", last: "Burrell")
expect(HumanNameUtils.clean_and_split_name("Andreas Wende FRICS").to_h).to eq(first: "Andreas", last: "Wende")
expect(HumanNameUtils.clean_and_split_name("Komm.-Rat DI.Michael Foelsner MBA MPA").to_h).to eq(first: "Michael", last: "Foelsner")
expect(HumanNameUtils.clean_and_split_name("Dr. Andreas Blaue, MLE").to_h).to eq(first: "Andreas", last: "Blaue")
expect(HumanNameUtils.clean_and_split_name("Kai Simon, Ph.D.").to_h).to eq(first: "Kai", last: "Simon")
expect(HumanNameUtils.clean_and_split_name("(¯`*•. Rudolf Engelsberger .•*´¯)").to_h).to eq(first: "Rudolf", last: "Engelsberger")
expect(HumanNameUtils.clean_and_split_name("¯``*•. Franz Ferdinand BSc. .•*´¯)").to_h).to eq(first: "Franz", last: "Ferdinand")
expect(HumanNameUtils.clean_and_split_name("★ Rosan Gompers ").to_h).to eq(first: "Rosan", last: "Gompers")
expect(HumanNameUtils.clean_and_split_name("Dr. Roland Wöss MMBA Akad Vkfm").to_h).to eq(first: "Roland", last: "Wöss")
expect(HumanNameUtils.clean_and_split_name("Sascha ILIN akad. IM").to_h).to eq(first: "Sascha", last: "Ilin")
expect(HumanNameUtils.clean_and_split_name("Gerhard Huber Akadfdl").to_h).to eq(first: "Gerhard", last: "Huber")
expect(HumanNameUtils.clean_and_split_name("Gerhard Wallner akad.FDL").to_h).to eq(first: "Gerhard", last: "Wallner")
expect(HumanNameUtils.clean_and_split_name("Gerry Schneider, akad. BM.").to_h).to eq(first: "Gerry", last: "Schneider")
expect(HumanNameUtils.clean_and_split_name("Antonio Artisian - MSs, BSc, MBA").to_h).to eq(first: "Antonio", last: "Artisian")
expect(HumanNameUtils.clean_and_split_name("Lars Schnidrig, Dipl. VW, MBA, LL.M.").to_h).to eq(first: "Lars", last: "Schnidrig")
expect(HumanNameUtils.clean_and_split_name("Prim. Univ.Prof. Dr. Nikolaus Schmeller").to_h).to eq(first: "Nikolaus", last: "Schmeller")
expect(HumanNameUtils.clean_and_split_name("Prim. Priv.-Doz. Dr. Anton Ponholzer, F.E.B.U.").to_h).to eq(first: "Anton", last: "Ponholzer")
expect(HumanNameUtils.clean_and_split_name("Prim. Univ.Doz. Dr. Lorenz Höltl").to_h).to eq(first: "Lorenz", last: "Höltl")
expect(HumanNameUtils.clean_and_split_name("HR Univ.Prof. Dr. Helmut Madersbacher").to_h).to eq(first: "Helmut", last: "Madersbacher")
expect(HumanNameUtils.clean_and_split_name("MR Dr. Maximilian Nachtigall").to_h).to eq(first: "Maximilian", last: "Nachtigall")
expect(HumanNameUtils.clean_and_split_name("Priv.Doz. Dr. Matthias Johannes Waldert").to_h).to eq(first: "Matthias", last: "Waldert")
expect(HumanNameUtils.clean_and_split_name("Dr. med. Andreas Franczak, FEBS").to_h).to eq(first: "Andreas", last: "Franczak")
expect(HumanNameUtils.clean_and_split_name("OA Dr. Alexis Freitas, F.E.B.S.").to_h).to eq(first: "Alexis", last: "Freitas")
expect(HumanNameUtils.clean_and_split_name("Priv. Doz. DDr. Patrick Nierlich, MBA, FEBVS").to_h).to eq(first: "Patrick", last: "Nierlich")
expect(HumanNameUtils.clean_and_split_name("Dr. Mohamed Ahmed Ali Salama, M.B.B.CH, MSc").to_h).to eq(first: "Mohamed", last: "Salama")
expect(HumanNameUtils.clean_and_split_name("Diana von Bodungen").to_h).to eq(first: "Diana", last: "von Bodungen")
expect(HumanNameUtils.clean_and_split_name("Peter van Hausen").to_h).to eq(first: "Peter", last: "van Hausen")
expect(HumanNameUtils.clean_and_split_name("Peter van der Hausen").to_h).to eq(first: "Peter", last: "van der Hausen")
expect(HumanNameUtils.clean_and_split_name("Ingrid Tarko - v. d. Linde").to_h).to eq(first: "Ingrid", last: "v. d. Linde")
expect(HumanNameUtils.clean_and_split_name("Ruben Dijk, van (werk)").to_h).to eq(first: "Ruben", last: "Dijk")
expect(HumanNameUtils.clean_and_split_name("Myrthe Hessels - v/d Linden").to_h).to eq(first: "Myrthe", last: "v/d Linden")
expect(HumanNameUtils.clean_and_split_name("Max Van Den Doel").to_h).to eq(first: "Max", last: "van den Doel")
expect(HumanNameUtils.clean_and_split_name("Rob van den Heuvel").to_h).to eq(first: "Rob", last: "van den Heuvel")
expect(HumanNameUtils.clean_and_split_name("Eric Jan C. van Putten").to_h).to eq(first: "Eric", last: "van Putten")
expect(HumanNameUtils.clean_and_split_name("René van der Zel").to_h).to eq(first: "René", last: "van der Zel")
expect(HumanNameUtils.clean_and_split_name("Marie-Cecile van der Leeuw - van Hasselt").to_h).to eq(first: "Marie", last: "van der Leeuw")
expect(HumanNameUtils.clean_and_split_name("Maarten van de Koevering").to_h).to eq(first: "Maarten", last: "van de Koevering")
expect(HumanNameUtils.clean_and_split_name("Dorrith Wijbenga-v.d. Velde").to_h).to eq(first: "Dorrith", last: "v.d. Velde")
expect(HumanNameUtils.clean_and_split_name("Sake V/d Kloet").to_h).to eq(first: "Sake", last: "v/d Kloet")
expect(HumanNameUtils.clean_and_split_name("Nanci Hogenboom van het Hof").to_h).to eq(first: "Nanci", last: "van het Hof")
expect(HumanNameUtils.clean_and_split_name("Merle Koomans v. d. Dries").to_h).to eq(first: "Merle", last: "v. d. Dries")
expect(HumanNameUtils.clean_and_split_name("Barteld van 't Ende").to_h).to eq(first: "Barteld", last: "van ’t Ende")
expect(HumanNameUtils.clean_and_split_name("Bas van ' t Klooster").to_h).to eq(first: "Bas", last: "van ’ t Klooster")
expect(HumanNameUtils.clean_and_split_name("Bert Van 't Net").to_h).to eq(first: "Bert", last: "van ’t Net")
expect(HumanNameUtils.clean_and_split_name("Marcel Van t Goor").to_h).to eq(first: "Marcel", last: "van t Goor")
expect(HumanNameUtils.clean_and_split_name("Johan Van ‘ t hul").to_h).to eq(first: "Johan", last: "van ’ t Hul")
expect(HumanNameUtils.clean_and_split_name("Linda van ‘t Wout Beschikbaar").to_h).to eq(first: "Linda", last: "van ’t Wout")
expect(HumanNameUtils.clean_and_split_name("Lotte Van 't Klooster-van Halderen").to_h).to eq(first: "Lotte", last: "van ’t Klooster")
expect(HumanNameUtils.clean_and_split_name("Diane van 't Veen-Scheenstra").to_h).to eq(first: "Diane", last: "van ’t Veen")
expect(HumanNameUtils.clean_and_split_name("Maarten van der Tol MBA").to_h).to eq(first: "Maarten", last: "van der Tol")
expect(HumanNameUtils.clean_and_split_name("Hermine Van 'T Wout").to_h).to eq(first: "Hermine", last: "van ’t Wout")
expect(HumanNameUtils.clean_and_split_name("Jolanda van 't Erve - ten Heuw ").to_h).to eq(first: "Jolanda", last: "van ’t Erve")
expect(HumanNameUtils.clean_and_split_name("Carlos Van `t Veen").to_h).to eq(first: "Carlos", last: "van ’t Veen")
expect(HumanNameUtils.clean_and_split_name("Carlos Van 't Veen").to_h).to eq(first: "Carlos", last: "van ’t Veen")
expect(HumanNameUtils.clean_and_split_name("Carlos Van ’t Veen").to_h).to eq(first: "Carlos", last: "van ’t Veen")
expect(HumanNameUtils.clean_and_split_name("Carlos Van ‘t Veen").to_h).to eq(first: "Carlos", last: "van ’t Veen")
expect(HumanNameUtils.clean_and_split_name("Michael in 't Veen").to_h).to eq(first: "Michael", last: "Veen")
expect(HumanNameUtils.clean_and_split_name("Monique Hogeland-van Outersterp").to_h).to eq(first: "Monique", last: "van Outersterp")
expect(HumanNameUtils.clean_and_split_name("Irma van Gaalen-Doesburg").to_h).to eq(first: "Irma", last: "van Gaalen")
expect(HumanNameUtils.clean_and_split_name("Mieke van Akkeren van het acteurs collectief").to_h).to eq(first: "Mieke", last: "van Akkeren")
expect(HumanNameUtils.clean_and_split_name("Ine de Haas").to_h).to eq(first: "Ine", last: "de Haas")
expect(HumanNameUtils.clean_and_split_name("Ivan Futters, PHD").to_h).to eq(first: "Ivan", last: "Futters")
expect(HumanNameUtils.clean_and_split_name("Gerhard Obkircher-Vanhausen").to_h).to eq(first: "Gerhard", last: "Obkircher")
expect(HumanNameUtils.clean_and_split_name("Gerhard Vanhausen-Obkircher").to_h).to eq(first: "Gerhard", last: "Obkircher")
# TODO known failure:
# expect(HumanNameUtils.clean_and_split_name("Paul Hof Van 't").to_h).to eq(first: "Paul", last: "Hof")
expect(HumanNameUtils.clean_and_split_name("Laurent de la CLERGERIE").to_h).to eq(first: "Laurent", last: "de la Clergerie")
expect(HumanNameUtils.clean_and_split_name("Arnaud de La Taille").to_h).to eq(first: "Arnaud", last: "de la Taille")
expect(HumanNameUtils.clean_and_split_name("Guillaume de Montangon").to_h).to eq(first: "Guillaume", last: "de Montangon")
# TODO get capitalization right for this
expect(HumanNameUtils.clean_and_split_name("Guillaume de l'Augsburg").to_h).to eq(first: "Guillaume", last: "de L’augsburg")
expect(HumanNameUtils.clean_and_split_name("Niels-Van-T Land").to_h).to eq(first: "Niels", last: "van-t Land")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment