Skip to content

Instantly share code, notes, and snippets.

@julienschmidt
Last active June 9, 2016 20:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save julienschmidt/7178960 to your computer and use it in GitHub Desktop.
Save julienschmidt/7178960 to your computer and use it in GitHub Desktop.
Find all sub-sequence permutations of a given word in a dictionary
package main
import (
"bytes"
"fmt"
"io/ioutil"
"strings"
)
var index64 = [64]int{
0, 1, 48, 2, 57, 49, 28, 3,
61, 58, 50, 42, 38, 29, 17, 4,
62, 55, 59, 36, 53, 51, 43, 22,
45, 39, 33, 30, 24, 18, 12, 5,
63, 47, 56, 27, 60, 41, 37, 16,
54, 35, 52, 21, 44, 32, 23, 11,
46, 26, 40, 15, 34, 20, 31, 10,
25, 14, 19, 9, 13, 8, 7, 6,
}
func readDict(path string, word string) (map[string]bool, error) {
buf, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
// Either just use this:
// (much shorter but also slower)
/*
dict := make(map[string]bool)
b := bytes.Split(buf, []byte("\n"))
for i := range b {
dict[string(bytes.ToLower(b[i]))] = true
}
return dict, nil
*/
// Or the following:
var first [256]bool
for i := 0; i < len(word); i++ {
first[word[i]] = true
}
for i, upper := 0, strings.ToUpper(word); i < len(word); i++ {
first[upper[i]] = true
}
indexBitmap := make([]uint64, (len(buf)+63)/64)
count := 0
for i := 0; i < len(buf); i++ {
// Find the next line break
pos := bytes.IndexByte(buf[i:], '\n')
if pos <= len(word) {
if pos < 0 {
break
}
if first[buf[i]] {
count++
}
}
i += pos
// Save pos in bitmap
indexBitmap[i/64] |= 1 << (uint(i) % 64)
}
dict := make(map[string]bool, count)
offset := 0
for i, bm := range indexBitmap {
for bm > 0 {
// Get the least significant set bit (=1)
lsb := bm & -bm
// Remove the LSB from the bitmap
bm ^= lsb
// Get index from LSB with 64bit DeBruijn multiplication table
// This could be done even faster using Assembly:
// http://en.wikipedia.org/wiki/Find_first_set
index := index64[(lsb*0x03f79d71b4cb0a89)>>58] + i*64
if index - offset <= len(word) && first[buf[offset]] {
dict[string(bytes.ToLower(buf[offset:index]))] = true
}
offset = index + 1
}
}
return dict, nil
}
func permSubseq(dict map[string]bool, prefix, str string) (p []string) {
var charPermuted [256]bool
for i := 0; i < len(str); i++ {
if !charPermuted[str[i]] {
nStr := prefix + str[i:i+1]
if dict[nStr] {
p = append(p, nStr)
}
if len(str) > 1 {
p = append(p, permSubseq(dict, nStr, str[:i]+str[i+1:])...)
}
charPermuted[str[i]] = true
}
}
return p
}
func main() {
str := "racecardss"
dict, err := readDict("/usr/share/dict/words", str)
if err != nil {
return
}
p := permSubseq(dict, "", str)
fmt.Println(p)
fmt.Println(len(p))
}
@anfernee
Copy link

58 index := index64[(lsb_0x03f79d71b4cb0a89)>>58] + i_64

This is brilliant. though I am wondering where does those magic numbers (0x03f79d71b4cb0a89, 58) come from?

@julienschmidt
Copy link
Author

This is a De Bruijn sequence

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment