Skip to content

Instantly share code, notes, and snippets.

@Salakar
Last active Jul 9, 2019
Embed
What would you like to do?
Shard Firebase RTDB by `userId` or any arbitrary string/identifier. Uses a 16 bit cyclic redundancy check to calculate the users shard number based on their user id string. This negates the need for a master instance that maps where users data is stored. See comment below code block for more details.
const lookup = [
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0
];
/**
* Convert a string to a UTF8 array - faster than via buffer / no buffer in RN
* @param str
* @returns {Array}
*/
function toUTF8Array(str) {
let char;
let i = 0;
let p = 0;
const utf8 = [];
const len = str.length;
for (; i < len; i++) {
char = str.charCodeAt(i);
if (char < 128) {
utf8[p++] = char;
} else if (char < 2048) {
utf8[p++] = (char >> 6) | 192;
utf8[p++] = (char & 63) | 128;
} else if (
((char & 0xFC00) === 0xD800) && (i + 1) < str.length &&
((str.charCodeAt(i + 1) & 0xFC00) === 0xDC00)) {
char = 0x10000 + ((char & 0x03FF) << 10) + (str.charCodeAt(++i) & 0x03FF);
utf8[p++] = (char >> 18) | 240;
utf8[p++] = ((char >> 12) & 63) | 128;
utf8[p++] = ((char >> 6) & 63) | 128;
utf8[p++] = (char & 63) | 128;
} else {
utf8[p++] = (char >> 12) | 224;
utf8[p++] = ((char >> 6) & 63) | 128;
utf8[p++] = (char & 63) | 128;
}
}
return utf8;
}
/**
* Convert a string into a CRC16 hash.
* @param str
* @returns {number}
*/
function generate(str) {
let char;
let i = 0;
let start = -1;
let result = 0;
let resultHash = 0;
const utf8 = toUTF8Array(str);
const len = utf8.length;
while (i < len) {
char = utf8[i++];
if (start === -1) {
if (char === 0x7B) {
start = i;
}
} else if (char !== 0x7D) {
resultHash = lookup[(char ^ (resultHash >> 8)) & 0xFF] ^ (resultHash << 8);
} else if (i - 1 !== start) {
return resultHash & 0x3FFF;
}
result = lookup[(char ^ (result >> 8)) & 0xFF] ^ (result << 8);
}
return result & 0x3FFF;
}
/**
* Get the shard number for a generated slot.
*
* @param slot
* @param shards
* @returns {number}
*/
function shardForSlot(slot, shards = 10) {
const slotSize = Math.round(16383 / shards);
return Math.ceil(slot / slotSize);
}
// testing using user ids
console.log(shardForSlot(generate('008ZlXmlSoR5JOUw9asn2uvfDnH2')));
console.log(shardForSlot(generate('00I0dU5eCjRsq5OsNEiR7F6Z81A3')));
console.log(shardForSlot(generate('010imbVXHJfmonoC0Mz6wGInsys1')));
console.log(shardForSlot(generate('016xRLmbTJhCLE8038hRQ6BzrlC2')));
console.log(shardForSlot(generate('01Ncn9PWMXXm6oc8uVNPZiFyJOE2')));
console.log(shardForSlot(generate('01T7yi9SJkMyjl5PmMMlqykwUd43')));
console.log(shardForSlot(generate('02771fjg81dCe9VwRyB9zEf5UMw2')));
console.log(shardForSlot(generate('02GtQG8tI9ORjadgVqQZ5POkotO2')));
console.log(shardForSlot(generate('zzpv0R09UnTgc5MjiMMMIQ1SHRF2')));
console.log(shardForSlot(generate('14ZxIeKAKHfdeW4LYkRYTJoRwPC2')));
console.log(shardForSlot(generate('zeEwnZaYYBP7XvEjrdEgMnB5J092')));
console.log(shardForSlot(generate('SIEntLUOUec5VsW7AuyV8hnOtVF2')));
console.log(shardForSlot(generate('tqJJIowPQgQx9TA2P1hqNI39Mnl1')));
console.log(shardForSlot(generate('MhBFF5R8XfdCPgoxaHaOzRP2KY33')));
console.log(shardForSlot(generate('47kKFge003VhEInw4YxJFkbPaiI3')));
console.log(shardForSlot(generate('JdaheYIX0IXGeVVwSl369uIUTCh2')));
console.log(shardForSlot(generate('kAn7T2EW0UaX88R1oFVoDBNHWnt1')));
console.log(shardForSlot(generate('FYNwIewg48aCSbEVRmImnhERgbG3')));
console.log(shardForSlot(generate('customUserId12byIFtTMURT')));
// these will all go to the same shard id
console.log(shardForSlot(generate('{foo}AoLwrQMawBfyCZ0Yp44y6h5I8nB2')));
console.log(shardForSlot(generate('{foo}mfipaYNDDyfJqzYSvy0TJYQDkN53')));
console.log(shardForSlot(generate('{foo}ZGxjo5cjLUQOI56kJyOiOm9aGDF3')));
// OUTPUT
/*
8
9
9
9
1
4
2
7
3
10
4
6
9
7
3
8
1
1
3
// same shard because of the tag 'foo':
8
8
8
*/
// e.g. my-rtdb-name${slot} - my-rtdb-name10
@Salakar

This comment has been minimized.

Copy link
Owner Author

@Salakar Salakar commented Nov 20, 2018

The same can be done for document ids etc.

Additionally, you can tag where specific data goes, for example, if you had a bunch of data relating to a chat group and wanted all the messages for that chat group on the same shard you can tag your string that you use to generate the shard instance number, e.g. {im-a-tag}message1234 & {im-a-tag}message5678 will both return the same shard number as its calculated based on the tag only (im-a-tag in this example, but it could also be the chat group id).


Code taken from my https://github.com/Salakar/cluster-key-slot npm package - which is used by both of the official Redis clients for Node.js - ioredis & redis packages on NPM.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment