Skip to content

Instantly share code, notes, and snippets.

@caldempsey
Last active June 24, 2020 00:40
Show Gist options
  • Save caldempsey/6bb625e185334b16509a7a909a18ffbb to your computer and use it in GitHub Desktop.
Save caldempsey/6bb625e185334b16509a7a909a18ffbb to your computer and use it in GitHub Desktop.
Write 10 million UUIDs to a text file is 6 seconds using Golang (stdlib)
package main
import (
"bytes"
"fmt"
uuid "github.com/satori/go.uuid"
"log"
"os"
"runtime"
"sync"
"time"
)
// As it turns out the main bottleneck is https://github.com/golang/go/issues/19563.
// A custom implementation of UUID which does not implement rand.Read _is_ faster
// If set to -1 will attempt to evaluate the number of runqueues across all cores
const numCPUs = -1
const numUUIDs = 10000000
const UUIDLen = 16
func main() {
t := time.Now()
var file, _ = os.Create("out.txt") // For read access.
err := file.Truncate(int64((UUIDLen * numUUIDs) + (2 * numUUIDs)))
if err != nil {
log.Fatalln(err)
}
runtime.GOMAXPROCS(numCPUs)
if quotient, remainder := divmod(numUUIDs, int64(runtime.GOMAXPROCS(-1))); quotient+remainder <= 0 {
log.Fatalln("It must be possible to partition the number of UUID writes across cores")
}
numWorkers := runtime.GOMAXPROCS(-1) * 2
fmt.Printf("Writing to out.txt with GOMAXPROCS of %v using %v workers\n", runtime.GOMAXPROCS(-1), numWorkers)
// Ingest
var writesProducers sync.WaitGroup
// Partition the number of workers for run-queues to approximately double the number of run-queues made available.
quotient, remainder := divmod(numUUIDs, int64(numWorkers))
results := make([][]byte, numWorkers)
for w := 0; w < numWorkers; w++ {
writesProducers.Add(1)
n := int(quotient)
if w+1 == numCPUs {
n += int(remainder)
}
go uuidWriteProducer(&writesProducers, n, results, w)()
}
writesProducers.Wait()
// Results
var writesConsumer sync.WaitGroup
for i := range results {
var offset int64
i := i
if i != 0 {
for o := 0; o <= i; o++ {
offset += int64(len(results[o]))
}
} else {
offset = int64(len(results[i]))
}
go func() {
writesConsumer.Add(1)
_, err := file.WriteAt(results[i], offset)
if err != nil {
panic(err)
}
writesConsumer.Done()
}()
}
writesConsumer.Wait()
fmt.Printf("Done in %f seconds!\n", time.Since(t).Seconds())
}
func uuidWriteProducer(writesProducers *sync.WaitGroup, numWorkers int, results [][]byte, idx int) func() {
return func() {
runtime.LockOSThread()
defer writesProducers.Done()
buf := &bytes.Buffer{}
buf.Grow((UUIDLen * numWorkers) + (2 * numWorkers))
for i := 0; i < numWorkers; i++ {
uuid := uuid.NewV4().Bytes()
buf.Write(bytes.Join([][]byte{uuid, []byte( "\n")}, nil))
}
results[idx] = buf.Bytes()
runtime.UnlockOSThread()
}
}
func divmod(numerator, denominator int64) (quotient, remainder int64) {
quotient = numerator / denominator // integer division, decimals are truncated
remainder = numerator % denominator
return
}
@caldempsey
Copy link
Author

caldempsey commented Jun 23, 2020

Can in-place store a slice of bytes and write or use buf/non-buf channel

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment