Skip to content

Instantly share code, notes, and snippets.

@coyove
Created July 10, 2018 09:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save coyove/35cf5398b60642b9452e5a09d9615ef9 to your computer and use it in GitHub Desktop.
Save coyove/35cf5398b60642b9452e5a09d9615ef9 to your computer and use it in GitHub Desktop.
package main
import (
"io/ioutil"
"log"
"runtime"
"sort"
"strings"
"time"
)
func search(s string, x [][]uint32, opt byte) []uint32 {
r := make(map[uint64]bool)
for _, str := range strings.Split(s, " ") {
if strings.HasPrefix(str, "#") || strings.HasPrefix(str, "@") || strings.HasPrefix(str, "$") {
k := hashstr(str)
r[uint64(k)<<32] = true
continue
}
y := []rune(str)
for i := 0; i < len(y)-1; i++ {
k := uint32(uint16(y[i]))<<16 + uint32(uint16(y[i+1]))
r[uint64(k)] = true
}
}
res := make([]uint32, 0)
total := len(r)
for i, s2 := range x {
score := 0
flag := false
for _, s := range s2 {
if r[uint64(s)] {
score++
} else if r[uint64(s)<<32] {
flag = true
r[uint64(s)<<32] = false
}
}
for k, y := range r {
if uint32(k>>32) == 0 {
continue
}
if opt == '&' {
if y {
score = 0
flag = false
}
}
r[k] = true
}
if flag || (score > 0 && score > total/2) {
f := byte(float64(score) / float64(total) * 255)
res = append(res, uint32(f)<<24+uint32(i&0x00ffffff))
}
}
sort.Slice(res, func(i, j int) bool {
return byte(res[i]>>24) > byte(res[j]>>24)
})
return res
}
func hashstr(s string) uint32 {
hash := uint32(2166136261)
for _, c := range s {
hash *= 16777619
hash ^= uint32(c)
}
return hash | 0x80000000
}
func main() {
mt := &runtime.MemStats{}
runtime.ReadMemStats(mt)
log.Println(mt.HeapAlloc)
buf, _ := ioutil.ReadFile("1.csv")
x := [][]uint32{}
titles := []string{}
for _, line := range strings.Split(string(buf), "\n") {
titles = append(titles, line)
parts := strings.Split(line, ",")
parts = strings.Split(strings.Join(parts[1:], " "), " ")
r := make([]uint32, 0, len(parts)*4)
for _, part := range parts {
if strings.HasPrefix(part, "#") || strings.HasPrefix(part, "@") || strings.HasPrefix(part, "$") {
r = append(r, hashstr(part))
continue
}
y := []rune(part)
for i := 0; i < len(y)-1; i++ {
r = append(r, uint32(uint16(y[i]))<<16+uint32(uint16(y[i+1])))
}
}
sort.Slice(r, func(i, j int) bool { return r[i] < r[j] })
x = append(x, r)
}
runtime.GC()
runtime.ReadMemStats(mt)
log.Println(mt.HeapAlloc)
start := time.Now()
log.Println("==", len(search(" ", x, '|')))
log.Println(time.Now().Sub(start).Nanoseconds() / 1e6)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment