Skip to content

Instantly share code, notes, and snippets.

@rsms
Last active October 27, 2020 19:33
Show Gist options
  • Save rsms/d6ab4ab2ee784936e0883621f6841be6 to your computer and use it in GitHub Desktop.
Save rsms/d6ab4ab2ee784936e0883621f6841be6 to your computer and use it in GitHub Desktop.
Sortable efficient universally unique identifier in Go — PUBLISHED HERE: https://github.com/rsms/go-uuid
/* ISC License
Copyright (c) 2020, Rasmus Andersson <rsms.me>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
package suuid
import (
"crypto/rand"
math_rand "math/rand"
"time"
)
/*
Id is a universally unique identifier (UUID)
Ids are binary sortable.
The first 6 bytes constitutes a millisecond-precision timestamp in big-endian byte order.
Data layout:
Byte 0-3 timestamp second, big endian
Byte 4-5 timestamp millisecond, big endian
Byte 6-15 random
Data layout example:
00 31 04 39 02 c9 39 ce 14 6c 0b db a1 40 77 78
~~~~~~~~~~~ ----- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| | Random bytes
| |
| 713 milliseconds
|
3212345 seconds since 2020-09-13 12:26:40
= 2020-10-20 16:45:45.713 UTC
Note that while it seems like we could use nanosecond for the timestamp to reduce the
random data needed, environments like JavaScript doesn't neccessarily provide high-precision
clocks. Doing things this way means that we can generate and parse the embedded timestamp
in a wide variety of programming languages.
*/
type Id [16]byte
// ZeroId is the zero Id (must not be modified)
var ZeroId Id
// MaxId is the largest possible Id (must not be modified)
var MaxId = Id{
255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255,
}
// idEpochBase offsets the timestamp to provide a wider range.
// Effective range (0x0–0xFFFFFFFF): 2020-09-13 12:26:40 – 2156-10-20 18:54:55 (UTC)
const idEpochBase int64 = 1600000000
// GenID generates a universally unique Id suitable to be used for sorted identity
func GenId() Id {
var id Id
t := time.Now()
sec := uint32(t.Unix() - idEpochBase)
ns := uint64(t.Nanosecond())
ms := uint16(ns / uint64(time.Millisecond))
// second part
id[0] = byte(sec >> 24)
id[1] = byte(sec >> 16)
id[2] = byte(sec >> 8)
id[3] = byte(sec)
// millisecond part
id[4] = byte(ms >> 8)
id[5] = byte(ms)
// Use middle bytes of nanosecond to reduce need for random bytes.
// We pick the middle bytes so that we don't have to know the endianess of the host.
// Note that Windows uses a low-res timer for time.Now (Oct 2020)
// See https://go-review.googlesource.com/c/go/+/227499/ + github issue for discussion,
// see https://go-review.googlesource.com/c/go/+/227499/1/src/testing/time_windows.go for patch.
id[6] = byte(ns >> 24)
id[7] = byte(ns >> 16)
// rest are random bytes
if _, err := rand.Read(id[8:16]); err != nil {
// If crypto/rand fails, fall back to pseudo random number generator.
// This is fine since the id is not used for anything critical and its uniqueness
// is eventually verified (i.e. when inserting into a database.)
math_rand.Read(id[8:16])
}
return id
}
// MakeId creates a new Id with specific Unix timestamp and random bytes.
//
// nsec is the nanosecond part of the timestamp and should be in the range [0, 999999999].
// It's valid to pass values outside this range for nsec. Only the millisecond part of nsec
// is actually used.
//
// To create an Id with a time.Time object, do this:
// MakeId(t.Unix(), t.Nanosecond(), random)
//
// Up to 10 bytes is used from random.
// If len(random) < 10, the remaining "random" bytes of Id are zero.
//
func MakeId(sec int64, nsec int, random []byte) Id {
var id Id
s := uint32(sec - idEpochBase)
ms := uint16(nsec / int(time.Millisecond))
// second part
id[0] = byte(s >> 24)
id[1] = byte(s >> 16)
id[2] = byte(s >> 8)
id[3] = byte(s)
// millisecond part
id[4] = byte(ms >> 8)
id[5] = byte(ms)
copy(id[6:], random)
return id
}
// IdFromString decodes a string representation of an Id (i.e. from String() or EncodeString())
func IdFromString(encoded string) Id {
var id Id
id.DecodeString([]byte(encoded))
return id
}
// String returns a string representation of the Id.
// The returned string is sortable with the same order as the "raw" Id bytes and is URL safe.
func (id Id) String() string {
var buf [22]byte
n := id.EncodeString(buf[:])
return string(buf[n:])
}
// Time returns the time portion of the Id
func (id Id) Time() time.Time {
sec, ms := id.Timestamp()
return time.Unix(int64(sec)+idEpochBase, int64(ms)*int64(time.Millisecond))
}
// Timestamp returns the timestamp portion of the Id
func (id Id) Timestamp() (sec uint32, millisec uint16) {
sec = uint32(id[0])<<24 | uint32(id[1])<<16 | uint32(id[2])<<8 | uint32(id[3])
millisec = uint16(id[4])<<8 | uint16(id[5])
return
}
/*
EncodeString and DecodeString have been adapted from the ksuid project,
licensed as follows:
MIT License
Copyright (c) 2017 Segment.io
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
const base62Characters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
// EncodeString writes the receiver to dst which must be at least 22 bytes.
// Returns the start offset (this function starts writing at the end of dst.)
func (id Id) EncodeString(dst []byte) int {
const srcBase = 0x100000000
const dstBase = 62
parts := [4]uint32{
uint32(id[0])<<24 | uint32(id[1])<<16 | uint32(id[2])<<8 | uint32(id[3]),
uint32(id[4])<<24 | uint32(id[5])<<16 | uint32(id[6])<<8 | uint32(id[7]),
uint32(id[8])<<24 | uint32(id[9])<<16 | uint32(id[10])<<8 | uint32(id[11]),
uint32(id[12])<<24 | uint32(id[13])<<16 | uint32(id[14])<<8 | uint32(id[15]),
}
n := len(dst)
bp := parts[:]
bq := [4]uint32{}
dst[0] = '0'
for len(bp) != 0 {
quotient := bq[:0]
remainder := uint64(0)
for _, c := range bp {
value := uint64(c) + uint64(remainder)*srcBase
digit := value / dstBase
remainder = value % dstBase
if len(quotient) != 0 || digit != 0 {
quotient = append(quotient, uint32(digit))
}
}
// Writes at the end of the destination buffer because we computed the
// lowest bits first.
n--
dst[n] = base62Characters[remainder]
bp = quotient
}
return n
}
// DecodeString sets the receiving Id to the decoded value of src, which is expected to be a
// string previously encoded using EncodeString (base62 0-9A-Za-z)
func (id *Id) DecodeString(src []byte) {
const srcBase = 62
const dstBase = 0x100000000
parts := [22]byte{}
partsIndex := 21
for i := len(src); i > 0; {
// offsets into base62Characters
const offsetUppercase = 10
const offsetLowercase = 36
i--
b := src[i]
switch {
case b >= '0' && b <= '9':
b -= '0'
case b >= 'A' && b <= 'Z':
b = offsetUppercase + (b - 'A')
default:
b = offsetLowercase + (b - 'a')
}
parts[partsIndex] = b
partsIndex--
}
n := len(id)
bp := parts[:]
bq := make([]byte, 0, len(src))
for len(bp) > 0 {
quotient := bq[:0]
remainder := uint64(0)
for _, c := range bp {
value := uint64(c) + uint64(remainder)*srcBase
digit := value / dstBase
remainder = value % dstBase
if len(quotient) != 0 || digit != 0 {
quotient = append(quotient, byte(digit))
}
}
id[n-4] = byte(remainder >> 24)
id[n-3] = byte(remainder >> 16)
id[n-2] = byte(remainder >> 8)
id[n-1] = byte(remainder)
n -= 4
bp = quotient
}
var zero [16]byte
copy(id[:n], zero[:])
}
/* ISC License
Copyright (c) 2020, Rasmus Andersson <rsms.me>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
package suuid
import (
"testing"
"time"
)
func TestId(t *testing.T) {
assert := /*testutil.*/NewAssert(t)
assert.Eq("ZeroId encoding", ZeroId.String(), "0")
assert.Eq("MaxId encoding", MaxId.String(), "7n42DGM5Tflk9n8mt7Fhc7")
smallId := Id{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2}
id1 := GenId()
id2 := GenId()
// t.Logf("id1: %032x %q", id1[:], id1)
// t.Logf("id2: %x %q", id2[:], id2)
assert.Eq("ZeroId decode(encode())", ZeroId, IdFromString(ZeroId.String()))
assert.Eq("MaxId decode(encode())", MaxId, IdFromString(MaxId.String()))
assert.Eq("smallId decode(encode())", smallId, IdFromString(smallId.String()))
assert.Eq("id1 decode(encode())", id1, IdFromString(id1.String()))
assert.Eq("id1 decode(encode())", id1, IdFromString(id1.String()))
assert.Eq("id2 decode(encode())", id2, IdFromString(id2.String()))
// DecodeString: Check that DecodeString rewrites all bytes, not just some
var id3 Id
assert.Eq("id3", ZeroId, id3)
id3.DecodeString([]byte("7n42DGM5Tflk9n8mt7Fhc7"))
assert.Eq("id3 DecodeString should work", id3, MaxId)
id3.DecodeString([]byte("A"))
assert.Eq("id3 should be zeroed", id3, Id{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xA})
// Check that Time() and Timestamp() returns the correct time as encoded
tm := time.Unix(1600000000+3212345, 123*1000000) // 2020-10-20 16:45:45.123 UTC
idt := MakeId(tm.Unix(), tm.Nanosecond(), []byte{})
// t.Logf("tm %s", tm.UTC())
// t.Logf("idt %x", idt[:])
tssec, tsms := idt.Timestamp()
assert.Eq("Timestamp() tssec", tssec, uint32(3212345))
assert.Eq("Timestamp() tsms", tsms, uint16(123))
// must check with rounded time since Id timestamp has only millisecond precision
assert.Eq("Time()", idt.Time().Unix(), tm.Unix())
assert.Eq("Time()",
idt.Time().Nanosecond()/int(time.Millisecond),
tm.Nanosecond()/int(time.Millisecond))
}
/* ISC License
Copyright (c) 2020, Rasmus Andersson <rsms.me>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
package suuid
import (
"bytes"
"fmt"
"reflect"
"regexp"
"runtime/debug"
"strings"
"testing"
)
type Assert struct {
T *testing.T
// note: T instead of implicit since testing.T has function like Error that may easily cause
// mistakes, i.e. type assert.Error("thing", "msg", err) would not do what you think.
}
func NewAssert(t *testing.T) Assert {
return Assert{t}
}
func (a Assert) Ok(assertionfmt string, ok bool, v ...interface{}) bool {
a.T.Helper() // mark this function as a helper (for stack traces)
if !ok {
a.T.Errorf("FAIL: (assertion) "+assertionfmt, v...)
}
return ok
}
func (a Assert) Err(assertionfmt, errsubstr string, err error, v ...interface{}) bool {
a.T.Helper()
assertionfmt = "FAIL: " + assertionfmt
if err == nil {
a.T.Errorf(assertionfmt+"\nexpected error with substring %q\n(no error)",
append(v, errsubstr)...)
return false
}
if !strings.Contains(strings.ToLower(err.Error()), strings.ToLower(errsubstr)) {
a.T.Errorf(
assertionfmt+"\nExpected error to contain substring %q (case-insensitive)\nGot %q",
append(v, errsubstr, err.Error())...)
return false
}
return true
}
func (a Assert) NoErr(assertionfmt string, err error, v ...interface{}) bool {
a.T.Helper()
if err != nil {
a.T.Errorf("FAIL: "+assertionfmt+"; error: %v", append(v, err)...)
}
return err == nil
}
// Eq checks that value1 and value2 are equal.
//
// Accepts the following types of values:
// - Anything type that you can compare with "==" in Go (bool, int, float, string, etc.)
// - Pointer types (e.g. *struct)
// - []byte
//
func (a Assert) Eq(assertionfmt string, value1, value2 interface{}, v ...interface{}) bool {
a.T.Helper()
if value1 == nil && value2 == nil {
return true
}
if value1 == nil || value2 == nil {
a.T.Errorf("FAIL: "+assertionfmt+"\nvalue1: %s\nvalue2: %s",
append(v, Repr(value1), Repr(value2))...)
return false
}
v1, v2 := reflect.ValueOf(value1), reflect.ValueOf(value2)
t1, t2 := v1.Type(), v2.Type()
if t1 != t2 {
a.T.Errorf("FAIL: "+assertionfmt+"; types differ:\nvalue1: %T\nvalue2: %T",
append(v, value1, value2)...)
return false
}
if v1.CanAddr() && v1.UnsafeAddr() == v2.UnsafeAddr() {
return true
}
eq := false
if t1.Comparable() {
eq = value1 == value2
} else {
switch v1 := value1.(type) {
case []byte:
eq = bytes.Equal(v1, value2.([]byte))
default:
a.T.Errorf("FAIL: "+assertionfmt+"; assert.Eq can not compare values of type %T",
append(v, value1)...)
return false
}
}
if !eq {
a.T.Errorf("FAIL: "+assertionfmt+"\nvalue1: %s\nvalue2: %s",
append(v, Repr(value1), Repr(value2))...)
return false
}
return true
}
func (a Assert) Panic(expectedPanicRegExp string, f func()) bool {
a.T.Helper()
ok := false
// Note: (?i) makes it case-insensitive
expected := regexp.MustCompile("(?i)" + expectedPanicRegExp)
defer func() {
a.T.Helper()
if v := recover(); v != nil {
panicMsg := fmt.Sprint(v)
if ok = expected.MatchString(panicMsg); !ok {
a.T.Log(string(debug.Stack()))
a.T.Errorf("expected panic to match %q but got %q", expectedPanicRegExp, panicMsg)
}
} else {
a.T.Log(string(debug.Stack()))
a.T.Errorf("expected panic (but there was no panic)")
}
}()
f()
return ok
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment