Skip to content

Instantly share code, notes, and snippets.

@vessenes
Last active April 9, 2021 00:14
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save vessenes/ce7d1254ad92c6d2a996 to your computer and use it in GitHub Desktop.
Save vessenes/ce7d1254ad92c6d2a996 to your computer and use it in GitHub Desktop.
lock free pointer free golang hash table implementation. DO NOT USE. UNTESTED.
package main
import (
"errors"
"flag"
"log"
"os"
"runtime"
"strconv"
"sync"
"time"
"github.com/spaolacci/murmur3"
)
type Record struct {
Key [35]byte
Index int32
}
type MyHash struct {
Table []Record
Size uint32
}
func (hash *MyHash) Init(size int) {
hash.Size = uint32(size)
hash.Table = make([]Record, hash.Size)
}
func (hash *MyHash) Keys() [][35]byte {
var keys [][35]byte
for i := 0; i < len(hash.Table); i++ {
if hash.Table[i].Key != [35]byte{} {
keys = append(keys, hash.Table[i].Key)
}
}
return keys
}
func (hash *MyHash) Get(key [35]byte) int32 {
offset := murmur3.Sum32(key[:]) % hash.Size
nullkey := [35]byte{}
for i := uint32(0); ; i++ {
if hash.Table[(offset+i)%hash.Size].Key == key {
return hash.Table[(offset+i)%hash.Size].Index
} else if hash.Table[(offset+i)%hash.Size].Key == nullkey {
return 0
}
}
}
func (hash *MyHash) Set(key [35]byte, value int32) (int32, error) {
seen_zero := 0
offset := murmur3.Sum32(key[:]) % hash.Size
nullkey := [35]byte{}
tries := 0
for i := uint32(0); ; i++ {
if (offset+i)%hash.Size == 0 {
seen_zero++
}
if seen_zero > 1 {
return int32(0), errors.New("Hash table full. We looked and looked, couldn't find a spot.")
}
if hash.Table[(offset+i)%hash.Size].Key == key {
hash.Table[(offset+i)%hash.Size].Index = value
return hash.Table[(offset+i)%hash.Size].Index, nil
} else if hash.Table[(offset+i)%hash.Size].Key == nullkey {
trySet:
hash.Table[(offset+i)%hash.Size].Index = value
hash.Table[(offset+i)%hash.Size].Key = key
if key == hash.Table[(offset+i)%hash.Size].Key && value == hash.Table[(offset+i)%hash.Size].Index {
// everything is awesome, we're done
return hash.Table[(offset+i)%hash.Size].Index, nil
} else if key != hash.Table[(offset+i)%hash.Size].Key {
// someone clobbered our key. Be polite and try again.
log.Println("Someone clobbered our key. We're going to try again. Try", tries, "nesting..for", string(key[:]), value)
return hash.Set(key, value)
} else if value != hash.Table[(offset+i)%hash.Size].Index && key == hash.Table[(offset+i)%hash.Size].Key {
// this is our key. Fuck 'em.
log.Println("Someone clobbered our value. This is OUR key. writing again for the ", tries, "time")
tries += 1
goto trySet
}
hash.Table[(offset+i)%hash.Size].Index = value
hash.Table[(offset+i)%hash.Size].Key = key
return hash.Table[(offset+i)%hash.Size].Index, nil
}
}
}
var (
pressure = flag.Int("pressure", 5, "Amount of write pressure to apply.")
tablesize = flag.Int("size", 3e7, "Hash Table Size")
numkeys = flag.Int("keys", 2.5e7, "Number of Keys. Go larger than tablesize to check full table behaviour.")
maxprocs = flag.Int("maxprocs", runtime.NumCPU()*6, "Max threads. Defaults to 6 * core")
)
func main() {
flag.Parse()
log.Println("Commencing test with pressure", *pressure, "table size", *tablesize, "keys", *numkeys)
very_start := time.Now()
runtime.GOMAXPROCS(*maxprocs)
myhash := MyHash{}
myhash.Init(*tablesize)
numthreads := runtime.NumCPU() * 6
numsets := *numkeys / numthreads
var wg sync.WaitGroup
start := time.Now()
for threadnum := 0; threadnum < numthreads; threadnum++ {
wg.Add(1)
//log.Println("goroutine for", threadnum, "with numsets", numsets)
go func(i int) {
var mykey [35]byte
for counter := (numsets * i); counter < numsets*(i+1+*pressure); counter++ {
mykey = [35]byte{}
copy(mykey[:], "heyy"+strconv.Itoa(counter))
_, err := myhash.Set(mykey, int32(counter))
if err != nil {
log.Println("Error setting:", err)
os.Exit(1)
}
}
wg.Done()
}(threadnum)
}
wg.Wait()
log.Println("Done filling hashtable with ", numsets*numthreads, "items. Took", time.Since(start), "to fill")
var mykey [35]byte
for i := int32(*tablesize / 2); i < int32(*tablesize/2)+5; i++ {
mykey = [35]byte{}
start = time.Now()
copy(mykey[:], "heyy"+strconv.Itoa(int(i)))
_ = myhash.Get(mykey)
log.Println("Lookup took", time.Since(start))
}
start = time.Now()
log.Println("Commencing Garbage Collection.")
runtime.GC()
log.Println("Done. Took", time.Since(start))
start = time.Now()
log.Println("Commencing Second Garbage Collection.")
runtime.GC()
log.Println("Done. Took", time.Since(start))
log.Println("All done with test hash containing", len(myhash.Table), ". Total runtime", time.Since(very_start))
}
@tgulacsi
Copy link

This is absolutely not goroutine-safe!

gthomas@waterhouse:/tmp/x$ go run -race main.go -size=2000 -keys=1000                           
2015/01/24 11:57:51 Commencing test with pressure 5 table size 2000 keys 1000
==================
WARNING: DATA RACE
Write by goroutine 6:
  main.(*MyHash).Set()
      /tmp/x/main.go:66 +0x3d0
  main.func·001()
      /tmp/x/main.go:123 +0x1e5

Previous write by goroutine 5:
  main.(*MyHash).Set()
      /tmp/x/main.go:70 +0x698
  main.func·001()
      /tmp/x/main.go:123 +0x1e5

Goroutine 6 (running) created at:
  main.main()
      /tmp/x/main.go:130 +0x855

Goroutine 5 (running) created at:
  main.main()
      /tmp/x/main.go:130 +0x855
==================
2015/01/24 11:57:51 Done filling hashtable with  984 items. Took 626.943097ms to fill
2015/01/24 11:57:51 Lookup took 10.079µs
2015/01/24 11:57:51 Lookup took 9.526µs
2015/01/24 11:57:51 Lookup took 8.563µs
2015/01/24 11:57:51 Lookup took 7.197µs
2015/01/24 11:57:51 Lookup took 7.173µs
2015/01/24 11:57:51 Commencing Garbage Collection.
2015/01/24 11:57:51 Done. Took 1.2923ms
2015/01/24 11:57:51 Commencing Second Garbage Collection.
2015/01/24 11:57:51 Done. Took 733.568µs
2015/01/24 11:57:51 All done with test hash containing 2000 . Total runtime 631.425613ms
Found 1 data race(s)
exit status 66

@vessenes
Copy link
Author

The reason it's not thread safe is that writing an int32 isn't guaranteed atomic by go. But, for this use case, we don't mind a garbage value being written. It would be far slower to lock and unlock. Instead, if a garbage int32 is written half by one thread and half by the other, the checks further down the function will sort it out; providing the key was fully written by one thread or the other, one or the other will claim the slot and rewrite.

If the threads clobbered the [35]byte key by writing simultaneously, they will both move on to another slot leaving some garbage in the table.

Whether this is a good compromise in exchange for huge speedups, I leave to you. But note, I do say in big letters not to use it at the top. :)

@vessenes
Copy link
Author

On further thinking, I believe it's necessary to use a Compare and Swap on key setting to ensure that only one thread has the slot. In go, that means you'll probably be using numeric keys. You could use pointers, but for a large table you will end up with significant GC worries as the number of pointers grows.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment