Last active
June 28, 2016 00:23
-
-
Save joemfb/d033e9205208604f53345a7ff029f6b9 to your computer and use it in GitHub Desktop.
brute force and KMP string search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
:: brute force and KMP string search | |
:: arms accessible in dojo after /+ string-srch | |
:: try test:string-srch | |
:: | |
:::: /hoon/string-srch/lib | |
:: | |
/? 314 | |
!: | |
:::: ~novlen-hanweb | |
:: | |
|% | |
++ types | |
|% | |
++ part {nedl/(list) hstk/(list)} | |
++ full {nedl/(list) hstk/(list) strt/@ud} | |
++ args ?(part full) | |
++ kmp {nedl/(list) pfxs/(list @ud) strt/@ud hstk/(list)} | |
++ kmnr {j/@ud pfxs/(list @ud) hstk/(list) nedl/(list)} | |
-- | |
++ test | |
=+ a==+([a="a" b=14] |-(?~(b (weld a "b") $(a (weld a a), b (dec b))))) | |
=+ b="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ipsum, ipsum ipsum." | |
~& [(brut "aaaaaaab" a) (brut "ipsum" b)] | |
~& [(brat "aaaaaaab" a) (brat "ipsum" b)] | |
~& [(kmpm "aaaaaaab" a) (kmpm "ipsum" b)] | |
~& [(kmpa "aaaaaaab" a) (kmpa "ipsum" b)] | |
~ | |
:: | |
++ args :: parse w/ opt strt | |
|* * ^- full:types | |
?: ?=(^ +<+>) | |
[nedl=(limo +<-) hstk=(limo +<+) strt=0] | |
[nedl=(limo +<-) hstk=(zing (limo ~[+<+<])) strt=(@ud +<+>)] | |
++ brut :: brute first | |
|= args:types | |
=+ (args +<) | |
=. hstk (slag strt hstk) | |
=+ i=0 | |
|- ^- (unit @ud) | |
=+ [n=nedl h=hstk] :: shadow for nr-loop | |
|- | |
?: |(?=($~ n) ?=($~ h)) | |
~ | |
?: =(i.n i.h) | |
?~ t.n | |
`(add strt i) | |
$(n t.n, h t.h) | |
^$(i +(i), hstk +.hstk) | |
++ brat :: brute all | |
|= args:types | |
=+ (args +<) | |
=. hstk (slag strt hstk) | |
=+ i=0 | |
=| fnd/(list @ud) | |
|- ^+ fnd | |
=+ [n=nedl h=hstk] | |
|- | |
?: |(?=($~ n) ?=($~ h)) | |
(flop fnd) | |
?: =(i.n i.h) | |
?~ t.n | |
^$(i +(i), hstk +.hstk, fnd [(add strt i) fnd]) | |
$(n t.n, h t.h) | |
^$(i +(i), hstk +.hstk) | |
:: | |
:: Knuth-Morris-Pratt | |
:: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm | |
:: https://www.ics.uci.edu/~eppstein/161/960227.html | |
:: | |
++ kmpm | |
|= args:types | |
=+ (args +<) | |
(kmp-skmf nedl (kmp-pfxs nedl) strt hstk) | |
++ kmpa | |
|= args:types | |
=+ (args +<) | |
(kmp-skma nedl (kmp-pfxs nedl) strt hstk) | |
++ kmp-skmf :: kmp first | |
|= kmp:types | |
=. hstk (slag strt hstk) | |
=+ [i=0 j=0] | |
=+ len=(lent nedl) | |
|- ^- (unit @ud) | |
?: |(?=($~ nedl) ?=($~ hstk)) | |
~ | |
=+ n=(kmp-nr j pfxs hstk nedl) | |
?: &(=(len +(j.n)) m.n) | |
`(add strt (sub i j.n)) | |
$(i +(i), hstk +.hstk, j (add j.n !m.n)) | |
++ kmp-skma :: kmp all | |
|= kmp:types | |
=. hstk (slag strt hstk) | |
=+ [i=0 j=0] | |
=+ len=(lent nedl) | |
=| fnd/(list @ud) | |
|- ^+ fnd | |
?: |(?=($~ nedl) ?=($~ hstk)) | |
(flop fnd) | |
=+ n=(kmp-nr j pfxs hstk nedl) | |
?: &(=(len +(j.n)) m.n) | |
=+ f=(add strt (sub i j.n)) | |
$(i +(i), hstk +.hstk, j (snag j.n pfxs), fnd [f fnd]) | |
$(i +(i), hstk +.hstk, j (add j.n !m.n)) | |
++ kmp-nr :: kmp nr-loop | |
|= kmnr:types :: seek bw thru pfxs | |
|- ^- {j/@ud m/?} | |
=+ ^= m .=(-.hstk (snag j nedl)) | |
?: |(=(0 j) m) | |
[j m] | |
$(j (snag (dec j) pfxs)) | |
++ kmp-pfxs :: kmp nedl pfixs | |
|= nedl/(list) | |
=+ pfxs=(limo ~[0]) | |
^+ pfxs | |
?: (lth (lent nedl) 2) pfxs :: hacky, but ?~ nedl | |
=+ [i=0 n=+.nedl] :: doesn't keep type | |
|- ?~ n pfxs | |
=. i |- | |
=+ m=.=(-.n (snag i nedl)) | |
?: |(=(0 i) m) | |
(add i !m) | |
$(i (snag (dec i) pfxs)) | |
$(i i, n +.n, pfxs (welp pfxs (limo ~[i]))) | |
-- |
It's worth noting what's in other std libraries:
Java just does brute force: http://www.docjar.com/html/api/java/lang/String.java.html#1571
V8 does brute force, heuristically upgrading to Boyer-Moore: https://github.com/v8/v8/blob/master/src/string-search.h#L283
glibc uses Two-Way: https://sourceware.org/git/?p=glibc.git;a=blob;f=string/str-two-way.h;h=87ed8a03668ce113db7d364dba3e96d69b516de9;hb=HEAD
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Brute force
++brut
++brat
KMP
++kmpm
++kmpa
Note: KMP implemented unsigned math. Signed version already exists here: https://github.com/urbit/examples/blob/master/libs/strings-kmp/lib/strings-kmp.hoon
Each of those gates take two lists,
nedl
andhstk
, with an optionalstrt
atom for the number of list elements to skip.Note: version 2 works for searching nested lists, but I broke that with
++args
...