Skip to content

Instantly share code, notes, and snippets.

@febuiles
Last active February 29, 2024 07:17
Show Gist options
  • Save febuiles/c9c825d234018305bf0410a5148c23c0 to your computer and use it in GitHub Desktop.
Save febuiles/c9c825d234018305bf0410a5148c23c0 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"io/ioutil"
"math/rand"
"os"
"time"
"unicode/utf8"
)
func main() {
// create a 2mb file of valid UTF-8 (worst case)
rand.Seed(time.Now().UnixNano())
path := "gibberish.txt"
if err := writeFile(path, 2*1024*1024); err != nil {
panic(err)
}
data, err := ioutil.ReadFile(path)
if err != nil {
panic(err)
}
start := time.Now()
valid := utf8.Valid(data)
duration := time.Since(start)
defer os.Remove(path)
fmt.Printf("UTF-8 valid: %t\n", valid)
fmt.Printf("Time taken: %s\n", duration)
}
func writeFile(path string, size int) error {
file, err := os.Create(path)
if err != nil {
return err
}
defer file.Close()
// only use bytes we know are utf-8 valid
for i := 0; i < size; i++ {
char := byte(rand.Intn(95) + 32) // 32-126 inclusive
if _, err := file.Write([]byte{char}); err != nil {
return err
}
}
// add one invalid sequence at the end
if _, err := file.Write([]byte{0xF0, 0x41}); err != nil {
return err
}
return nil
}
@febuiles
Copy link
Author

febuiles commented Feb 29, 2024

~/w/dgp (yarn-parser)
$ go run foo.go
UTF-8 valid: false
Time taken: 214.916µs

~/w/dgp (yarn-parser)
$ go run foo.go
UTF-8 valid: false
Time taken: 248.625µs

~/w/dgp (yarn-parser)
$ go run foo.go
UTF-8 valid: false
Time taken: 195.458µs

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment