Skip to content

Instantly share code, notes, and snippets.

@avovsya
Created May 29, 2014 13:39
Show Gist options
  • Save avovsya/e23aaea3e8364934a58b to your computer and use it in GitHub Desktop.
Save avovsya/e23aaea3e8364934a58b to your computer and use it in GitHub Desktop.
Helper to work with cassandra's inverted indeces table. Allows to partition index key to multiple row keys.
"use strict";
var slug = require('slug');
var hashFunctions = {
/**
* Return string hash, based on first two symbols of string
* @param {string} str Str to calculate the hash
*/
"two_symbol_hash": function (str) {
str = slug(str);
str = str.toLowerCase().replace(/^www\d*\./i, '').replace(/\.|\d|-/gi, 'z');
return str.slice(0, 2);
},
"two_symbol_hash_range": function twoSymbolHashRange() {
if (twoSymbolHashRange.result) {
return twoSymbolHashRange.result;
}
var results = [];
for(var ch = 'a'.charCodeAt(0); ch <= 'z'.charCodeAt(0); ch++ ) {
for(var sch = 'a'.charCodeAt(0); sch <= 'z'.charCodeAt(0); sch++ ) {
results.push(String.fromCharCode(ch) + String.fromCharCode(sch));
}
}
twoSymbolHashRange.result = results;
return results;
}
};
/**
* Return a row key for an index table in Cassandra, based on value,
* @param {string} keyPrefix Key prefix. e.g. 'some_tag', 'another_tag', etc.
* @param {string} keyValue Value to put on that key
* @param {string} [hashFunction='two_symbol_hash'] Hash function to use
* @return {string} Partition key to use in cassandra index table. e.g.
* 'some_tag_ab', 'another_tag_jj', etc.
*/
function createPartitionKey(keyPrefix, value, hashFunction) {
hashFunction = hashFunction || 'two_symbol_hash';
return keyPrefix + '_' + hashFunctions[hashFunction](value);
}
/**
* Get key range for specified key to lookup in Cassandra index table
* @param {string} keyPrefix For which index key get key range. 'some_tag',
* 'another_tag', etc
* @param {string} [hashFunction='two_symbol_hash'] Which hash function to use
* @return {Array} array of keys to lookup in cassandra. e.g. ['some_tag_1',
* 'some_tag_2', 'some_tag_3'...]
*/
function getPartiotionKeyRange(keyPrefix, hashFunction) {
hashFunction = hashFunction || 'two_symbol_hash';
var results = [];
hashFunctions[hashFunction + '_range']().forEach(function (hashKey) {
results.push("'" + keyPrefix + '_' + hashKey + "'");
});
return results;
}
/**
* Get query to getch key values from inverted index
* @param {string} indexName Name of the table to query on
* @param {string} key Name of the key to get from index
* @param {string} [hashFunction='two_symbol_hash'] Name of the hash function
* for partitioning keys in index
* @return {string} Query to get key values from inverted index
*/
function getFromInvertedIndexQuery(indexName, key, hashFunction) {
var keys = getPartiotionKeyRange(key, hashFunction);
var query = "SELECT * FROM " + indexName + " WHERE key IN (";
query += keys.join(',') + ');';
return query;
}
/**
* Get query to put value to inverted index for specified key
* @param {string} indexName Name of the table to put value
* @param {string} key Name of the key to put
* @param {string} value Value to put into key
* @param {string} [hashFunction='two_symbol_hash'] Name of the hash function
* for partitioning keys in index
* @return {string} Query to put value into key in inverted index
*/
function putToInvertedIndexQuery(indexName, key, value, hashFunction) {
key = createPartitionKey(key, value, hashFunction);
var query = "INSERT INTO " + indexName + " (key, value) VALUES ('" + key + "', '" + value + "');";
return query;
}
module.exports = {
getFromInvertedIndexQuery: getFromInvertedIndexQuery,
putToInvertedIndexQuery: putToInvertedIndexQuery
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment