Last active
June 24, 2020 00:40
-
-
Save caldempsey/6bb625e185334b16509a7a909a18ffbb to your computer and use it in GitHub Desktop.
Write 10 million UUIDs to a text file is 6 seconds using Golang (stdlib)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"fmt" | |
uuid "github.com/satori/go.uuid" | |
"log" | |
"os" | |
"runtime" | |
"sync" | |
"time" | |
) | |
// As it turns out the main bottleneck is https://github.com/golang/go/issues/19563. | |
// A custom implementation of UUID which does not implement rand.Read _is_ faster | |
// If set to -1 will attempt to evaluate the number of runqueues across all cores | |
const numCPUs = -1 | |
const numUUIDs = 10000000 | |
const UUIDLen = 16 | |
func main() { | |
t := time.Now() | |
var file, _ = os.Create("out.txt") // For read access. | |
err := file.Truncate(int64((UUIDLen * numUUIDs) + (2 * numUUIDs))) | |
if err != nil { | |
log.Fatalln(err) | |
} | |
runtime.GOMAXPROCS(numCPUs) | |
if quotient, remainder := divmod(numUUIDs, int64(runtime.GOMAXPROCS(-1))); quotient+remainder <= 0 { | |
log.Fatalln("It must be possible to partition the number of UUID writes across cores") | |
} | |
numWorkers := runtime.GOMAXPROCS(-1) * 2 | |
fmt.Printf("Writing to out.txt with GOMAXPROCS of %v using %v workers\n", runtime.GOMAXPROCS(-1), numWorkers) | |
// Ingest | |
var writesProducers sync.WaitGroup | |
// Partition the number of workers for run-queues to approximately double the number of run-queues made available. | |
quotient, remainder := divmod(numUUIDs, int64(numWorkers)) | |
results := make([][]byte, numWorkers) | |
for w := 0; w < numWorkers; w++ { | |
writesProducers.Add(1) | |
n := int(quotient) | |
if w+1 == numCPUs { | |
n += int(remainder) | |
} | |
go uuidWriteProducer(&writesProducers, n, results, w)() | |
} | |
writesProducers.Wait() | |
// Results | |
var writesConsumer sync.WaitGroup | |
for i := range results { | |
var offset int64 | |
i := i | |
if i != 0 { | |
for o := 0; o <= i; o++ { | |
offset += int64(len(results[o])) | |
} | |
} else { | |
offset = int64(len(results[i])) | |
} | |
go func() { | |
writesConsumer.Add(1) | |
_, err := file.WriteAt(results[i], offset) | |
if err != nil { | |
panic(err) | |
} | |
writesConsumer.Done() | |
}() | |
} | |
writesConsumer.Wait() | |
fmt.Printf("Done in %f seconds!\n", time.Since(t).Seconds()) | |
} | |
func uuidWriteProducer(writesProducers *sync.WaitGroup, numWorkers int, results [][]byte, idx int) func() { | |
return func() { | |
runtime.LockOSThread() | |
defer writesProducers.Done() | |
buf := &bytes.Buffer{} | |
buf.Grow((UUIDLen * numWorkers) + (2 * numWorkers)) | |
for i := 0; i < numWorkers; i++ { | |
uuid := uuid.NewV4().Bytes() | |
buf.Write(bytes.Join([][]byte{uuid, []byte( "\n")}, nil)) | |
} | |
results[idx] = buf.Bytes() | |
runtime.UnlockOSThread() | |
} | |
} | |
func divmod(numerator, denominator int64) (quotient, remainder int64) { | |
quotient = numerator / denominator // integer division, decimals are truncated | |
remainder = numerator % denominator | |
return | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Can in-place store a slice of bytes and write or use buf/non-buf channel