Skip to content

Instantly share code, notes, and snippets.

@Salakar
Last active July 9, 2019 15:26
Show Gist options
  • Save Salakar/74027f8a6e50996fbb2dc75f82b9a60d to your computer and use it in GitHub Desktop.
Save Salakar/74027f8a6e50996fbb2dc75f82b9a60d to your computer and use it in GitHub Desktop.
Shard Firebase RTDB by `userId` or any arbitrary string/identifier. Uses a 16 bit cyclic redundancy check to calculate the users shard number based on their user id string. This negates the need for a master instance that maps where users data is stored. See comment below code block for more details.
const lookup = [
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0
];
/**
* Convert a string to a UTF8 array - faster than via buffer / no buffer in RN
* @param str
* @returns {Array}
*/
function toUTF8Array(str) {
let char;
let i = 0;
let p = 0;
const utf8 = [];
const len = str.length;
for (; i < len; i++) {
char = str.charCodeAt(i);
if (char < 128) {
utf8[p++] = char;
} else if (char < 2048) {
utf8[p++] = (char >> 6) | 192;
utf8[p++] = (char & 63) | 128;
} else if (
((char & 0xFC00) === 0xD800) && (i + 1) < str.length &&
((str.charCodeAt(i + 1) & 0xFC00) === 0xDC00)) {
char = 0x10000 + ((char & 0x03FF) << 10) + (str.charCodeAt(++i) & 0x03FF);
utf8[p++] = (char >> 18) | 240;
utf8[p++] = ((char >> 12) & 63) | 128;
utf8[p++] = ((char >> 6) & 63) | 128;
utf8[p++] = (char & 63) | 128;
} else {
utf8[p++] = (char >> 12) | 224;
utf8[p++] = ((char >> 6) & 63) | 128;
utf8[p++] = (char & 63) | 128;
}
}
return utf8;
}
/**
* Convert a string into a CRC16 hash.
* @param str
* @returns {number}
*/
function generate(str) {
let char;
let i = 0;
let start = -1;
let result = 0;
let resultHash = 0;
const utf8 = toUTF8Array(str);
const len = utf8.length;
while (i < len) {
char = utf8[i++];
if (start === -1) {
if (char === 0x7B) {
start = i;
}
} else if (char !== 0x7D) {
resultHash = lookup[(char ^ (resultHash >> 8)) & 0xFF] ^ (resultHash << 8);
} else if (i - 1 !== start) {
return resultHash & 0x3FFF;
}
result = lookup[(char ^ (result >> 8)) & 0xFF] ^ (result << 8);
}
return result & 0x3FFF;
}
/**
* Get the shard number for a generated slot.
*
* @param slot
* @param shards
* @returns {number}
*/
function shardForSlot(slot, shards = 10) {
const slotSize = Math.round(16383 / shards);
return Math.ceil(slot / slotSize);
}
// testing using user ids
console.log(shardForSlot(generate('008ZlXmlSoR5JOUw9asn2uvfDnH2')));
console.log(shardForSlot(generate('00I0dU5eCjRsq5OsNEiR7F6Z81A3')));
console.log(shardForSlot(generate('010imbVXHJfmonoC0Mz6wGInsys1')));
console.log(shardForSlot(generate('016xRLmbTJhCLE8038hRQ6BzrlC2')));
console.log(shardForSlot(generate('01Ncn9PWMXXm6oc8uVNPZiFyJOE2')));
console.log(shardForSlot(generate('01T7yi9SJkMyjl5PmMMlqykwUd43')));
console.log(shardForSlot(generate('02771fjg81dCe9VwRyB9zEf5UMw2')));
console.log(shardForSlot(generate('02GtQG8tI9ORjadgVqQZ5POkotO2')));
console.log(shardForSlot(generate('zzpv0R09UnTgc5MjiMMMIQ1SHRF2')));
console.log(shardForSlot(generate('14ZxIeKAKHfdeW4LYkRYTJoRwPC2')));
console.log(shardForSlot(generate('zeEwnZaYYBP7XvEjrdEgMnB5J092')));
console.log(shardForSlot(generate('SIEntLUOUec5VsW7AuyV8hnOtVF2')));
console.log(shardForSlot(generate('tqJJIowPQgQx9TA2P1hqNI39Mnl1')));
console.log(shardForSlot(generate('MhBFF5R8XfdCPgoxaHaOzRP2KY33')));
console.log(shardForSlot(generate('47kKFge003VhEInw4YxJFkbPaiI3')));
console.log(shardForSlot(generate('JdaheYIX0IXGeVVwSl369uIUTCh2')));
console.log(shardForSlot(generate('kAn7T2EW0UaX88R1oFVoDBNHWnt1')));
console.log(shardForSlot(generate('FYNwIewg48aCSbEVRmImnhERgbG3')));
console.log(shardForSlot(generate('customUserId12byIFtTMURT')));
// these will all go to the same shard id
console.log(shardForSlot(generate('{foo}AoLwrQMawBfyCZ0Yp44y6h5I8nB2')));
console.log(shardForSlot(generate('{foo}mfipaYNDDyfJqzYSvy0TJYQDkN53')));
console.log(shardForSlot(generate('{foo}ZGxjo5cjLUQOI56kJyOiOm9aGDF3')));
// OUTPUT
/*
8
9
9
9
1
4
2
7
3
10
4
6
9
7
3
8
1
1
3
// same shard because of the tag 'foo':
8
8
8
*/
// e.g. my-rtdb-name${slot} - my-rtdb-name10
@Salakar
Copy link
Author

Salakar commented Nov 20, 2018

The same can be done for document ids etc.

Additionally, you can tag where specific data goes, for example, if you had a bunch of data relating to a chat group and wanted all the messages for that chat group on the same shard you can tag your string that you use to generate the shard instance number, e.g. {im-a-tag}message1234 & {im-a-tag}message5678 will both return the same shard number as its calculated based on the tag only (im-a-tag in this example, but it could also be the chat group id).


Code taken from my https://github.com/Salakar/cluster-key-slot npm package - which is used by both of the official Redis clients for Node.js - ioredis & redis packages on NPM.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment