Created
October 25, 2016 14:55
-
-
Save bogdan/a1aa13fb54269627eb0d51dd95fe2f04 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "spec_helper" | |
describe FuzzyStringMatcher do | |
context ".similar?" do | |
RSpec::Matchers.define :be_similar do | |
match {|val| FuzzyStringMatcher.similar?(*val) } | |
end | |
FALSE_POSITIVE = <<-HEREDOC.lines.reject(&:blank?).map {|x| x.strip.split(", ") } | |
greg.dellolio, doug.dellolio | |
siw.myrland, ine.myrland | |
saad.khaldi, saad.alhuraib | |
kimberly.shifley, kimberly.hubacher | |
andrey.demidenko, andrey.zinovyev | |
justyna.dobrowolska.phd, justyna.zakaria | |
elaine.kolstad, cece.kolstad | |
jorden_nickel, jordenwilson | |
kelleytompkins, kelley_wilson | |
davemanwiller, carolemanwiller | |
laura.r.hann, lauradonarski | |
sarahgcimino, vmcimino | |
jennmueller75, cjmuell | |
amneuhaus2015, neuhauam | |
katie.cornacchia, mcornach | |
ahrenshayden, hahrens | |
andrew_crutchfield, shcrutchfield | |
sbellott, emile.bellott | |
krmarshall, lamarshall2 | |
bcon, ddcondello | |
deleonancy, deleon.maylin | |
lynnstubb, ryanstubb | |
rakenner, nannykenner | |
camoldenburg, mikeoldenburg | |
diandra.valentin, a.valentin1224 | |
HEREDOC | |
POSITIVE = <<-HEREDOC.lines.reject(&:blank?).map {|x| x.strip.split(", ") } | |
z, z | |
zz, fuzzy | |
al, also | |
foo, foo | |
bar, baz | |
lol, ololo | |
abcdefg, abedcf | |
alexander.lee, a.lee | |
jwshore, wendy_shore | |
kgreen11300, kgreen2 | |
chelsea.m.jennings, cmjenn | |
andreadubbert, adubbert | |
yana-moiseeva-2016, yana-moiseeva | |
freychuatwork, freychu | |
julian.winkler, julian.winkler94 | |
tonight, tonight13 | |
natasha.febriana, natasha | |
kwetzel, kwetzel12 | |
kkawaige, kawaige | |
maximommy, maximommy2 | |
suwala, judithsuwala | |
larsvinethorsen, larsvine | |
rachelwong, rachelwongwc | |
thekos7513, thekos | |
alisa.phillips17, alisa.phillips | |
bradfieldfamily, bradfieldfamily2 | |
ejrrb, tlsejrrb | |
ssproscino, sproscino | |
mbuffalino, mbuffalino27 | |
ellawer, ellawer93 | |
lalgazy, algazy | |
meisi, meisi.yi | |
nikopelkonen, niko | |
roybalvic, roybalvic74 | |
lekhaalexy, lekhaalexy1 | |
zweier, tonyazweier | |
valeriegoodwell, valeriegoodwell2 | |
avery.marc, marc | |
kelly, kellybluesky | |
sheath114, sheath114.82536b3 | |
bynuri7, bynuri7449 | |
sarah, sarah.catlow | |
rfh, rfh1987 | |
wdknight, wdknight12 | |
chassity.mosley, chassity.mosley76 | |
amybeadle1, amybeadle | |
acaputo, acaputo292 | |
sjlundstrom, sjlundstrom00 | |
gina.ladelfa, gina | |
bmrkl, bmrkl4 | |
lucygaribay86, lucy | |
porinjin, porinjina | |
bmpolen, bmpolen0825 | |
suzlind, suzlind1312 | |
caseydoyle16, caseyd | |
laurienotaro, laurienotarocreative | |
bwhelche, bwhelchel | |
angelabrezovsky, abrezovsky | |
dipietro, dipietro6938 | |
talkabletesting1, talkabletesting1+13fr | |
kari, kari.halvorsen | |
kelleyengstrom, kelley | |
gary, garyalto | |
tasha, natasha | |
tasha, tasha.zozulya | |
paula, paulavs89 | |
justintime6847, justintime684 | |
christophegoris, christophe | |
sophieisberg, sophieisberg8 | |
jpallarca66, jpallarca | |
gemma.d90, gemma | |
roberto.coluccio, us.roberto.coluccio | |
taylor, taylortraceye | |
bkrulo, bkrulo2 | |
chris.claydon, chris | |
jiny04, jiny | |
larryleahc, larryleahc2015 | |
ranikaprasad58, ranikaprasad | |
sushant44, sushant | |
kcapoccia11, kcapoccia | |
talkabletesting1, talkabletesting1+5ad | |
hayel, hayel01 | |
talkabletesting1, talkabletesting1+1fr | |
jarboui, mohammed.jarboui | |
58949, uk58949 | |
gymbo, gymboree | |
joe.mccourt, joe.mccourt94 | |
swsunwoo, akswsunwoo | |
murpha, murpha16 | |
baileysarah15, baileysa | |
andrew.crowe, andrew | |
m.cisneros0729, cisneros0729 | |
leefamily706, klee | |
lori, lorenandannette | |
jameson.knop, jsknop | |
kelsey.davis, kelseydhanna | |
bpayne, blake.payne | |
ann.fattore, fattore_ann | |
breecruz2005, cruz.bree93 | |
choiahyoung95, hayoungchoi95 | |
daveconlon1975, conlondave | |
fengyuan66, yuanfeng0810 | |
fiona_hsiao2002, hsiao.fiona | |
katie.pesce, pescekatie | |
kua.benjamin, benjamin.kua | |
lisawoomer5, woomerlisa | |
meril.darees, darees_meril | |
neill.hamilton, hamilton.neill | |
rebecca.colaiacovo, colaiacovo1990 | |
rgm_junior, junior_rgm | |
samia_hasan, hasansamia | |
testevikat, vikatteste | |
testvika, vikatest | |
tomsvika, vikatoms | |
vakufacarijana1, arijanavakufac | |
warshawmichael, michael.warshaw | |
xin.zhang, zhangxin4189 | |
zhangheidi5, heidizhang | |
HEREDOC | |
FALSE_NEGATIVE = <<-HEREDOC.lines.reject(&:blank?).map {|x| x.strip.split(", ") } | |
genine.glavich-hawkins, ge9glavic | |
elise.s.beck, esbeck011 | |
HEREDOC | |
NEGATIVE = <<-HEREDOC.lines.reject(&:blank?).map {|x| x.strip.split(", ") } | |
lisa.rassenti, laura.rassenti | |
daniela.grossmann, michael.grossmann | |
angela.tucciarelli, teresa.tucciarelli | |
ashley.unverferth, amy.unverferth | |
mcclanahan_robin, mcclanahan_ashley | |
thadmccauley, janetmccauley | |
mira_roseman, rob_roseman | |
schot.inna, shotlena | |
emilykotzan, mike.kotzan | |
amanda, samantha | |
zz, zy | |
zz, yy | |
zz, brazzers | |
al, alternative | |
foo, bar | |
lol, omg | |
fvie, ten | |
mgeere, mdiaz.design | |
srminnich, lauren.thai | |
zvarnell, donaldrowens | |
tori.crofford, olivia.anderson | |
marissadaniels89, jenniferlolder | |
tinz.lovered, itin.mardahlia | |
christinalsanchez, stevenwaew | |
saikiranvudutala, sindhuragade | |
jeffries.melissa, evangeline.hsiao | |
hege_johannessen, jeanetteunni | |
ronettacolton, katonyaormond | |
jessica.shaw, mickellshaw | |
norinehendricks, janerosenewcomb | |
klangstonpr, artistryinmotion | |
castro.arilene, carmanwg | |
sandyjkorthuis, ademeyer | |
sairazbadat, farihajawed0322 | |
caroline_w98, anna_1997 | |
ufkarla, krauset43 | |
htran1557, tn25921 | |
kristentcho, scottmkato | |
allykins001, taryn22_81 | |
sisselt, ingfridel | |
anh.le2, maianh139 | |
rachnye88, dnyer11 | |
npartlow, npercival121086 | |
ctempestjones, sara.teewinot | |
ryan.au, andrew.patterson.3001 | |
cheriej22, dcardinal7523 | |
cassmmrs, ashley_mcfarling | |
brianpchsu, beatriceguardregister | |
lilianyan2014, johnyan29 | |
mmurphree1984, brandon.murphree | |
agarwalraghu1, sorabhhamirwasia | |
sgoldstein97, dalysdoesnails | |
sreevidya.kanagala, ishayerra | |
jcthomas1995, cathleen.thomas4 | |
estrella2024, mayram2908 | |
patriciak.smith, chattykim | |
keith, keenen | |
lexa_in_america, julia.bakker | |
roxana.cornejo10, alondra_morataya | |
nscsks, dylan.schacht | |
martha.airhart, mjmarsack | |
courtcourt2015, glima2015 | |
ashleylyncakes, ashgar819 | |
crazy_mer, craig.sandra.h | |
helly_m, heidi_sk93 | |
aprilelyse, maylone | |
dl.luca, l.loprieno | |
shah92malav, shahnidhi91 | |
roberts.jessicam, jenbroberts | |
nmachesney, mernestm | |
trish_8980, tsherlin | |
beccaablanton, carolannpierce | |
diane.lee, ryan | |
calliek, mcornell78 | |
katie-templeton, tammianderson | |
angelicameastman, michaelstonge12 | |
topherbook, gobrentboles | |
madisonodell, mistero111 | |
rerdil, kathryntbailey | |
mikerobson79, catemmerson | |
carminuccio, jodiecamillo | |
vaibhav.chidrewar, herambgadekar | |
mai.bocquet, monica-chao | |
flyfisher26, amiller2204 | |
meredithmorse611, danmorse21 | |
andben795, yanoshek | |
gogobecky_gaming, beth_roan | |
fataya16, haydeen11 | |
andrey.demidenko, zagorodniy.vladimir | |
travis, katrina.petryk | |
wonderdoyin, adeniyiaderukuola | |
khushinder.singh, savaliyahenish | |
eliecheng, wahooiee | |
boredchickinstlouis, squirrelmonk | |
annkulak, mariannak | |
lindsey.hallett, kimberly.denton | |
mikenomura, camille.vachon | |
alexis_carillo, lisspoto13 | |
ryan.lints, erica.metzger | |
emma.p.johannes, destinee.a.rea | |
jlahlum, katihavlicek | |
kelly.malcher, ashley.reid.elizabeth | |
sharon.bea, ybarezer | |
nnanna_213, acomor21 | |
ekdaugherty, ninakherrera | |
wsasseville, walter-tz1mkl | |
shann79green, reisdelaney | |
gizzlejonasx3, galloway918 | |
gabrielkbkim, planeteer2k | |
leopardchik90, nielsonfamily001 | |
michelle.means, kristinmariemeans | |
lisa_ehle, usethis66 | |
whiteangel93, benpgauthier | |
dkyank, yankeyjessica | |
niamhperrotta, johnperrotta78 | |
brendacarlile9, craigcarlile | |
janhenrikskisland, marenskisland | |
marjorierodetis, demirodetis | |
markwstrauss, rhondalstrauss | |
daracshelley, tammyshelley | |
heatherboynton, carolboynton | |
iris.teeuwen, carlateeuwen | |
bianca_lewis, taralewis8 | |
rachelaunderwood, melissa.underwood87 | |
kristagerrity, christophergerrity | |
ilie.balmus, nina.balmus | |
jaabshire714, meabshire621 | |
HEREDOC | |
CSV.parse(File.read('spec/fixtures/files/controversial_emails_for_similarity_check.csv').tr(' ','')).each do |values| | |
specify { expect(values).not_to be_similar } | |
end | |
POSITIVE.each do |values| | |
specify { expect(values).to be_similar } | |
end | |
FALSE_POSITIVE.each do |values| | |
specify { expect(values).to be_similar } | |
end | |
FALSE_NEGATIVE.each do |values| | |
specify { expect(values).not_to be_similar } | |
end | |
NEGATIVE.each do |values| | |
specify { expect(values).not_to be_similar } | |
end | |
specify { expect(["", ""]).not_to be_similar } | |
specify { expect(["zz", ""]).not_to be_similar } | |
context 'chinese emails' do | |
specify { expect(['7328757346', '7328757346']).to be_similar } | |
# Some different digits | |
specify { expect(['7328757346', '7328757357']).to be_similar } | |
specify { expect(['7328417346', '7328757346']).to be_similar } | |
# Totally different numbers | |
specify { expect(['7328757346', '4758657796']).not_to be_similar } | |
specify { expect(['ab7328757346', 'ab4758657796']).to be_similar } | |
# More than 2 letters | |
specify { expect(['abc7328757346', 'abc4758657796']).to be_similar } | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment