Created
February 23, 2023 17:10
-
-
Save kolemannix/4e53c10666086244702e8cc567a03823 to your computer and use it in GitHub Desktop.
Timestamp-based 64-bit IDs with sharding and random elements
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.util.Random | |
object IdGeneration { | |
private[id] val OUR_EPOCH = 1546344000000L // 2019-01-01T12:00:00Z | |
private[id] val FIFTEEN_BIT_MAX: Int = 0x8000 | |
private def randomComponent15Bit(): Int = { | |
Random.nextInt(FIFTEEN_BIT_MAX) | |
} | |
private[id] val TWENTY_THREE_BIT_MAX: Int = 0x800000 | |
private def randomComponent23Bit(): Int = { | |
Random.nextInt(TWENTY_THREE_BIT_MAX) | |
} | |
private[id] val SHARD_MASK = 0x7f8000L | |
private[id] val SHARD_SHIFT = 15 | |
private[id] val TIME_MASK = 0xffffffffff800000L | |
private[id] val TIME_SHIFT = 23 | |
private[id] val FORTY_ONE_BIT_MAX = 0x20000000000L | |
private def timeComponent(timeMillis: Long): Long = { | |
(timeMillis - OUR_EPOCH) % FORTY_ONE_BIT_MAX | |
} | |
private[id] val EIGHT_BIT_MAX = 256 | |
/** Generates a timestamp-based random ID, based off the current time. 41 bits of the ID are based on the timestamp, which defaults to the current time in | |
* milliseconds The other 23 are random | |
*/ | |
def generateRandomId(): Long = { | |
generateRandomIdWithTime(System.currentTimeMillis()) | |
} | |
/** Generates a timestamp-based random ID. 41 bits of the ID are based on the timestamp, which defaults to the current time in milliseconds The other 23 are | |
* random | |
* @param timeMillis | |
* The time to use, defaults to current time | |
*/ | |
def generateRandomIdWithTime(timeMillis: Long): Long = { | |
val time = timeComponent(timeMillis) | |
val random = randomComponent23Bit() | |
// time 41 | random 23 | |
(time << TIME_SHIFT) | random | |
} | |
/** Generates a timestamp-based ID. | |
* - 41 bits of the ID are based on the timestamp, which defaults to the current time in milliseconds | |
* - 8 bits are based on the shardInput parameter, which is ideally a hashcode or identifier of some related entity or object. For example, if this ID is | |
* for a chat message, a hash of the id of the user who sent the message. This can be used to reduce collisions, since no user will chat thousands of | |
* times per millisecond. | |
* - 15 remaining bits are random | |
*/ | |
def generateShardedIdWithTime(shardInput: Int, timeMillis: Long): Long = { | |
val time = timeComponent(timeMillis) | |
val shard = Math.abs(shardInput) % EIGHT_BIT_MAX | |
val random = randomComponent15Bit() | |
// time 41 | shard 8 | random 15 | |
(time << TIME_SHIFT) | (shard << SHARD_SHIFT) | random | |
} | |
/** Generates a timestamp-based ID. | |
* - 41 bits of the ID are based on the timestamp, which defaults to the current time in milliseconds | |
* - 8 bits are based on the shardInput parameter, which is ideally a hashcode or identifier of some related entity or object | |
* - 15 remaining bits are random | |
*/ | |
def generateShardedId(shardInput: Int): Long = { | |
generateShardedIdWithTime(shardInput, System.currentTimeMillis()) | |
} | |
} | |
object Base62Long { | |
final val GMP_ALPHABET_62: Array[Char] = Array( | |
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', | |
'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', | |
) | |
final val GMP_ALPHABET_62_MAP: Map[Char, Int] = GMP_ALPHABET_62.zipWithIndex.toMap | |
private final val POWS_62: Map[Int, Long] = Map( | |
(0, 1L), | |
(1, 62L), | |
(2, 3844L), | |
(3, 238328L), | |
(4, 14776336L), | |
(5, 916132832L), | |
(6, 56800235584L), | |
(7, 3521614606208L), | |
(8, 218340105584896L), | |
(9, 13537086546263552L), | |
(10, 839299365868340224L), | |
) | |
/** Encodes the given value, interpreted as an unsigned 64-bit integer, to a base62 String representation | |
*/ | |
def encode(input: Long): String = { | |
if (input == 0L) { | |
return "0" | |
} | |
var remaining = input | |
// The longest these will ever be is 11 characters (the encoding of 0xFFFFFFFFFFFFFFFF: LygHa16AHYF) | |
val buffer = new StringBuffer(16) | |
var iterations = 0 | |
while (remaining != 0) { | |
val quotient = java.lang.Long.divideUnsigned(remaining, 62) | |
val remainder = java.lang.Long.remainderUnsigned(remaining, 62) | |
val character = GMP_ALPHABET_62(remainder.toInt) | |
buffer.append(character) | |
remaining = quotient | |
// Guard against (mathematically impossible) infinite loops | |
if (iterations > 1000) { | |
throw new Exception("Detected infinite loop in Base62Long.encode") | |
} | |
iterations = iterations + 1 | |
} | |
buffer.reverse.toString | |
} | |
/** Decodes the given value, assumed to be a String of characters from the GMP Base62 alphabet, back to its 64-bit long reperesentation | |
*/ | |
def decode(input: String): Long = { | |
if (input.length > 11) { | |
throw new IllegalArgumentException(s"Too large input: ${input}") | |
} | |
var result = 0L | |
var digit = 0 | |
input.reverseIterator.foreach { c => | |
val index = | |
try { GMP_ALPHABET_62_MAP(c) } | |
catch { | |
case _: NoSuchElementException => throw new IllegalArgumentException(s"Illegal base62 input: ${input}") | |
} | |
val exp = | |
try { POWS_62(digit) } | |
catch { | |
case _: NoSuchElementException => throw new IllegalArgumentException(s"Too large input: ${input}") | |
} | |
val added = index * exp | |
result += added | |
digit = digit + 1 | |
} | |
result | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment