Skip to content

Instantly share code, notes, and snippets.

@17twenty
Created October 27, 2021 21:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 17twenty/b382695ff8132f311d843413486c5807 to your computer and use it in GitHub Desktop.
Save 17twenty/b382695ff8132f311d843413486c5807 to your computer and use it in GitHub Desktop.
Use base62, UUID and the birthday paradox to create shorter unique IDs.
package main
import (
"encoding/hex"
"fmt"
"log"
"math"
"strconv"
"strings"
"time"
"github.com/gofrs/uuid"
"github.com/pkg/errors"
)
// References
// https://medium.com/i-math/the-birthday-problem-307f31a9ac6f
// https://eager.io/blog/how-long-does-an-id-need-to-be/
// That would require 25 bits (log2 of the number of seconds in a year).
// What we want to do is take the bits needed to represent (seconds into the current year) + (remaining bits of random data from UUID)
func main() {
startOfYear := time.Date(time.Now().Year(), 0, 0, 0, 0, 0, 0, time.Now().Location())
since := uint64(time.Since(startOfYear) / time.Second) // number of seconds since January 1, Current Year time
fmt.Println("Seconds since", since)
t, err := uuid.NewV4()
if err != nil {
log.Fatalln(err)
}
t.Bytes()
fmt.Printf("%08b\n", t.Bytes())
result := uint64(t.Bytes()[0])<<56 | uint64(t.Bytes()[1])<<48 | uint64(t.Bytes()[2])<<40 | uint64(t.Bytes()[3])<<32 | since
fmt.Printf("%08b\n", result)
fmt.Printf("%08b \n", t.Bytes())
encoded := toBase62(uint64(result))
fmt.Println(">>", encoded)
// Now reverse the process
decoded, err := fromBase62(encoded)
if err != nil {
log.Fatalln("Broken", err)
}
// Grab it back by stealing the first 25 bits
sinceDecoded := decoded & ((1 << 25) - 1)
log.Println("Time should be...", sinceDecoded)
}
const encodingChunkSize = 2
// no of bytes required in base62 to represent hex encoded string value of length encodingChunkSize
// given by formula :: int(math.Ceil(math.Log(math.Pow(16, 2*encodingChunkSize)-1) / math.Log(62)))
const decodingChunkSize = 3
func Encode(str string) string {
var encoded strings.Builder
inBytes := []byte(str)
byteLength := len(inBytes)
for i := 0; i < byteLength; i += encodingChunkSize {
chunk := inBytes[i:minOf(i+encodingChunkSize, byteLength)]
s := hex.EncodeToString(chunk)
val, _ := strconv.ParseUint(s, 16, 64)
w := padLeft(toBase62(val), "0", decodingChunkSize)
encoded.WriteString(w)
}
return encoded.String()
}
func Decode(encoded string) (string, error) {
decodedBytes := []byte{}
for i := 0; i < len(encoded); i += decodingChunkSize {
chunk := encoded[i:minOf(i+decodingChunkSize, len(encoded))]
val, err := fromBase62(chunk)
if err != nil {
return "", err
}
chunkHex := strconv.FormatUint(val, 16)
dst := make([]byte, hex.DecodedLen(len([]byte(chunkHex))))
_, err = hex.Decode(dst, []byte(chunkHex))
if err != nil {
return "", errors.Wrap(err, "malformed input")
}
decodedBytes = append(decodedBytes, dst...)
}
s := string(decodedBytes)
return s, nil
}
func minOf(a int, b int) int {
if a < b {
return a
}
return b
}
func padLeft(str, pad string, length int) string {
for len(str) < length {
str = pad + str
}
return str
}
const (
base uint64 = 62
characterSet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
)
func toBase62(num uint64) string {
encoded := ""
for num > 0 {
r := num % base
num /= base
encoded = string(characterSet[r]) + encoded
}
return encoded
}
func fromBase62(encoded string) (uint64, error) {
var val uint64
for index, char := range encoded {
pow := len(encoded) - (index + 1)
pos := strings.IndexRune(characterSet, char)
if pos == -1 {
return 0, errors.New("invalid character: " + string(char))
}
val += uint64(pos) * uint64(math.Pow(float64(base), float64(pow)))
}
return val, nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment