Last active
June 24, 2020 01:55
-
-
Save caldempsey/6e079150668f06e4ed518df22d5e8361 to your computer and use it in GitHub Desktop.
Write 10 million UUIDs to a text file is 0.2 seconds using Golang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"fmt" | |
"github.com/rogpeppe/fastuuid" | |
"log" | |
"os" | |
"runtime" | |
"sync" | |
"time" | |
) | |
/* | |
Having benchmarked Golang's built in UUID library which leans on its ability to interface with kernal-interface | |
system calls the results showed when working across millions of calls, they added up to be roughly 30x more expensive | |
than not doing so (so expensive by some polynomial time). This bottleneck is raised here https://github.com/golang/go/issues/19563. | |
We find fastuuid is faster because it only reads from syscall randomly once; the rest is pseudorandomly generated from its once-read random implementation | |
whilst this produces valid UUIDV4 forms, a sly fox might notice a pattern in the UUIDs as they are deterministic. | |
Between this implementation and uuid_io.go (another gist), we save approximately 6 seconds on a modern-user system by avoiding https://linux.die.net/man/8/genrandom | |
*/ | |
// If set to -1 will attempt to evaluate the number of runqueues across all cores | |
const numCPUs = -1 | |
const numUUIDs = 10000000 | |
const UUIDLen = 16 | |
var g = fastuuid.MustNewGenerator() | |
func main() { | |
t := time.Now() | |
var file, _ = os.Create("out.txt") // For read access. | |
err := file.Truncate(int64((UUIDLen * numUUIDs) + (2 * numUUIDs))) | |
if err != nil { | |
log.Fatalln(err) | |
} | |
runtime.GOMAXPROCS(numCPUs) | |
if quotient, remainder := divmod(numUUIDs, int64(runtime.GOMAXPROCS(-1))); quotient+remainder <= 0 { | |
log.Fatalln("It must be possible to partition the number of UUID writes across cores") | |
} | |
numWorkers := runtime.GOMAXPROCS(-1) * 2 | |
fmt.Printf("Writing to out.txt with GOMAXPROCS of %v using %v workers\n", runtime.GOMAXPROCS(-1), numWorkers) | |
// Ingest | |
var writesProducers sync.WaitGroup | |
// Partition the number of workers for run-queues to approximately double the number of run-queues made available. | |
quotient, remainder := divmod(numUUIDs, int64(numWorkers)) | |
results := make([][]byte, numWorkers) | |
for w := 0; w < numWorkers; w++ { | |
writesProducers.Add(1) | |
n := int(quotient) | |
if w+1 == numCPUs { | |
n += int(remainder) | |
} | |
go uuidWriteProducer(&writesProducers, n, results, w)() | |
} | |
writesProducers.Wait() | |
// Results | |
var writesConsumer sync.WaitGroup | |
for i := range results { | |
var offset int64 | |
i := i | |
if i != 0 { | |
for o := 0; o <= i; o++ { | |
offset += int64(len(results[o])) | |
} | |
} else { | |
offset = int64(len(results[i])) | |
} | |
go func() { | |
// ITS TIME TO GO FAST | |
runtime.LockOSThread() | |
writesConsumer.Add(1) | |
_, err := file.WriteAt(results[i], offset) | |
if err != nil { | |
panic(err) | |
} | |
writesConsumer.Done() | |
}() | |
} | |
writesConsumer.Wait() | |
fmt.Printf("Done in %f seconds!\n", time.Since(t).Seconds()) | |
} | |
func uuidWriteProducer(writesProducers *sync.WaitGroup, numWorkers int, results [][]byte, idx int) func() { | |
return func() { | |
defer writesProducers.Done() | |
buf := &bytes.Buffer{} | |
buf.Grow((UUIDLen * numWorkers) + (2 * numWorkers)) | |
for i := 0; i < numWorkers; i++ { | |
newlineDeliUUID := g.Hex128() + "\n" | |
buf.Write([]byte(newlineDeliUUID)) | |
} | |
results[idx] = buf.Bytes() | |
} | |
} | |
func divmod(numerator, denominator int64) (quotient, remainder int64) { | |
quotient = numerator / denominator // integer division, decimals are truncated | |
remainder = numerator % denominator | |
return | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment