Skip to content

Instantly share code, notes, and snippets.

@kolemannix
Created February 23, 2023 17:10
Show Gist options
  • Save kolemannix/4e53c10666086244702e8cc567a03823 to your computer and use it in GitHub Desktop.
Save kolemannix/4e53c10666086244702e8cc567a03823 to your computer and use it in GitHub Desktop.
Timestamp-based 64-bit IDs with sharding and random elements
import scala.util.Random
object IdGeneration {
private[id] val OUR_EPOCH = 1546344000000L // 2019-01-01T12:00:00Z
private[id] val FIFTEEN_BIT_MAX: Int = 0x8000
private def randomComponent15Bit(): Int = {
Random.nextInt(FIFTEEN_BIT_MAX)
}
private[id] val TWENTY_THREE_BIT_MAX: Int = 0x800000
private def randomComponent23Bit(): Int = {
Random.nextInt(TWENTY_THREE_BIT_MAX)
}
private[id] val SHARD_MASK = 0x7f8000L
private[id] val SHARD_SHIFT = 15
private[id] val TIME_MASK = 0xffffffffff800000L
private[id] val TIME_SHIFT = 23
private[id] val FORTY_ONE_BIT_MAX = 0x20000000000L
private def timeComponent(timeMillis: Long): Long = {
(timeMillis - OUR_EPOCH) % FORTY_ONE_BIT_MAX
}
private[id] val EIGHT_BIT_MAX = 256
/** Generates a timestamp-based random ID, based off the current time. 41 bits of the ID are based on the timestamp, which defaults to the current time in
* milliseconds The other 23 are random
*/
def generateRandomId(): Long = {
generateRandomIdWithTime(System.currentTimeMillis())
}
/** Generates a timestamp-based random ID. 41 bits of the ID are based on the timestamp, which defaults to the current time in milliseconds The other 23 are
* random
* @param timeMillis
* The time to use, defaults to current time
*/
def generateRandomIdWithTime(timeMillis: Long): Long = {
val time = timeComponent(timeMillis)
val random = randomComponent23Bit()
// time 41 | random 23
(time << TIME_SHIFT) | random
}
/** Generates a timestamp-based ID.
* - 41 bits of the ID are based on the timestamp, which defaults to the current time in milliseconds
* - 8 bits are based on the shardInput parameter, which is ideally a hashcode or identifier of some related entity or object. For example, if this ID is
* for a chat message, a hash of the id of the user who sent the message. This can be used to reduce collisions, since no user will chat thousands of
* times per millisecond.
* - 15 remaining bits are random
*/
def generateShardedIdWithTime(shardInput: Int, timeMillis: Long): Long = {
val time = timeComponent(timeMillis)
val shard = Math.abs(shardInput) % EIGHT_BIT_MAX
val random = randomComponent15Bit()
// time 41 | shard 8 | random 15
(time << TIME_SHIFT) | (shard << SHARD_SHIFT) | random
}
/** Generates a timestamp-based ID.
* - 41 bits of the ID are based on the timestamp, which defaults to the current time in milliseconds
* - 8 bits are based on the shardInput parameter, which is ideally a hashcode or identifier of some related entity or object
* - 15 remaining bits are random
*/
def generateShardedId(shardInput: Int): Long = {
generateShardedIdWithTime(shardInput, System.currentTimeMillis())
}
}
object Base62Long {
final val GMP_ALPHABET_62: Array[Char] = Array(
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
)
final val GMP_ALPHABET_62_MAP: Map[Char, Int] = GMP_ALPHABET_62.zipWithIndex.toMap
private final val POWS_62: Map[Int, Long] = Map(
(0, 1L),
(1, 62L),
(2, 3844L),
(3, 238328L),
(4, 14776336L),
(5, 916132832L),
(6, 56800235584L),
(7, 3521614606208L),
(8, 218340105584896L),
(9, 13537086546263552L),
(10, 839299365868340224L),
)
/** Encodes the given value, interpreted as an unsigned 64-bit integer, to a base62 String representation
*/
def encode(input: Long): String = {
if (input == 0L) {
return "0"
}
var remaining = input
// The longest these will ever be is 11 characters (the encoding of 0xFFFFFFFFFFFFFFFF: LygHa16AHYF)
val buffer = new StringBuffer(16)
var iterations = 0
while (remaining != 0) {
val quotient = java.lang.Long.divideUnsigned(remaining, 62)
val remainder = java.lang.Long.remainderUnsigned(remaining, 62)
val character = GMP_ALPHABET_62(remainder.toInt)
buffer.append(character)
remaining = quotient
// Guard against (mathematically impossible) infinite loops
if (iterations > 1000) {
throw new Exception("Detected infinite loop in Base62Long.encode")
}
iterations = iterations + 1
}
buffer.reverse.toString
}
/** Decodes the given value, assumed to be a String of characters from the GMP Base62 alphabet, back to its 64-bit long reperesentation
*/
def decode(input: String): Long = {
if (input.length > 11) {
throw new IllegalArgumentException(s"Too large input: ${input}")
}
var result = 0L
var digit = 0
input.reverseIterator.foreach { c =>
val index =
try { GMP_ALPHABET_62_MAP(c) }
catch {
case _: NoSuchElementException => throw new IllegalArgumentException(s"Illegal base62 input: ${input}")
}
val exp =
try { POWS_62(digit) }
catch {
case _: NoSuchElementException => throw new IllegalArgumentException(s"Too large input: ${input}")
}
val added = index * exp
result += added
digit = digit + 1
}
result
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment