Last active
February 24, 2018 00:53
-
-
Save qti3e/60ef2ea4f1f75e85d14f3944c1e02288 to your computer and use it in GitHub Desktop.
Show-case: Redis as a simple indexing backend in Node.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// @flow | |
import Redis from 'redis'; | |
import bluebird from 'bluebird'; | |
import type {RedisClient} from 'redis'; | |
import uuid from 'uuid/v4'; | |
import md5 from 'md5'; | |
bluebird.promisifyAll(Redis.RedisClient.prototype); | |
const SearchClient: RedisClient = Redis.createClient({ | |
db: 2 | |
}); | |
// ------------------------------------------------------------------------------------------------ | |
// indexing and searching algorithms :) | |
type IndexType = {[string]: number}; | |
const indexLen = 3; | |
const maxResults = 500; | |
const regexNonLetters = /[!@#\$%\^&\(\)-=_\+\s\[\]`~,./<>\?{}\\\|'"]/gi; | |
const regexIsNumeric = /^\d+$/ig; | |
function isNumeric(number: any) { | |
if(typeof number === 'number') { | |
return true; | |
} | |
if(typeof number === 'string') { | |
if(regexIsNumeric.test(number)) { | |
return true; | |
} | |
} | |
return false; | |
} | |
function getIndexes(content: string): IndexType { | |
content = content.replace(regexNonLetters, '').toLowerCase(); | |
const indexes: IndexType = {}; | |
const length: number = Math.floor(content.length / indexLen); | |
for(let row = 0; row < indexLen; row += 1) { | |
for(let i = 0; i < length; i += 1) { | |
let word = content.substr(i * indexLen + row, indexLen); | |
if(word.length !== indexLen) { | |
continue; | |
} | |
if(!indexes[word]) { | |
indexes[word] = 0; | |
} | |
indexes[word] += 1; | |
} | |
} | |
return indexes; | |
} | |
function toFiltersRecursive( | |
namespace: string | Array<string>, | |
obj: any | |
): Array<[string, number | string]> { | |
if(typeof namespace === 'string') { | |
namespace = [namespace]; | |
} | |
const filters: Array<[string, number | string]> = []; | |
const keys: Array<string> = Object.keys(obj); | |
while(keys.length) { | |
const key: string = keys.pop(); | |
const pointer: string = namespace.join('.') + '.' + key; | |
const value = obj[key]; | |
if(isNumeric(value)) { | |
filters.push([pointer, Number(value)]); | |
continue; | |
} | |
if(typeof value === 'string') { | |
filters.push([pointer, value]); | |
continue; | |
} | |
if(typeof value === 'object') { | |
const subFilters = toFiltersRecursive(namespace.concat(key), value); | |
while(subFilters.length) { | |
filters.push(subFilters.pop()); | |
} | |
} | |
} | |
return filters; | |
} | |
async function simpleSearch(pointer: string, value: string | number): Promise<string> { | |
if(typeof value === 'number') { | |
return pointer + ':' + value; | |
} | |
const resultsUUID: string = uuid(); | |
const keysToUnion: Array<string> = []; | |
const indexes: IndexType = getIndexes(value); | |
const keys: Array<string> = Object.keys(indexes); | |
while(keys.length) { | |
const word = keys.pop(); | |
keysToUnion.push(pointer + ':' + word); | |
} | |
await SearchClient.zunionstoreAsync(resultsUUID, keysToUnion.length, ...keysToUnion); | |
await SearchClient.zremrangebyrankAsync(resultsUUID, 0, -(maxResults + 1)); | |
await SearchClient.expireAsync(resultsUUID, 30); | |
return resultsUUID; | |
} | |
export async function search( | |
namespace: string, | |
filters: any | |
): Promise<string> { | |
filters = toFiltersRecursive(namespace, filters); | |
const cacheKey = md5(JSON.stringify(filters)); | |
if(await SearchClient.existsAsync(cacheKey)) { | |
await SearchClient.expireAsync(cacheKey, 18e2); | |
return cacheKey; | |
} | |
const keysToIntersect: Array<string> = []; | |
const resultsUUID = cacheKey; | |
while(filters.length) { | |
const [pointer, value] = filters.pop(); | |
keysToIntersect.push(await simpleSearch(pointer, value)); | |
} | |
await SearchClient.zinterstoreAsync(resultsUUID, keysToIntersect.length, ...keysToIntersect); | |
await SearchClient.zremrangebyrankAsync(resultsUUID, 0, -(maxResults + 1)); | |
await SearchClient.expireAsync(resultsUUID, 18e2); | |
return resultsUUID; | |
} | |
export async function index( | |
namespace: string | Array<string>, | |
primaryKey: string, | |
obj: any, | |
skip: Array<string> = [] | |
): Promise<void> { | |
if(typeof namespace === 'string') { | |
namespace = [namespace]; | |
} | |
let parent = ''; | |
if(namespace.length > 1) { | |
parent = namespace.slice(1).join('.') + '.'; | |
} | |
if(typeof obj === 'object') { | |
for(let key in obj) { | |
// skip whatever specifed in `skip` arg. | |
if(skip.indexOf(parent + key) > -1) { | |
continue; | |
} | |
const pointer = namespace.join('.') + '.' + key; | |
const value = obj[key]; | |
if(typeof value === 'object') { | |
index(namespace.concat(key), primaryKey, value, skip); | |
continue; | |
} | |
if(isNumeric(value)) { | |
SearchClient.zaddAsync(pointer + ':' + value, 50, primaryKey); | |
// also it enables us to search between values in the future | |
SearchClient.zaddAsync(pointer + '::n', Number(value), primaryKey); | |
continue; | |
} | |
// anything rather than string is not supported | |
if(typeof value !== 'string') { | |
continue; | |
} | |
const indexes: IndexType = getIndexes(value); | |
const keys: Array<string> = Object.keys(indexes); | |
while(keys.length) { | |
const word = keys.pop(); | |
SearchClient.zaddAsync(pointer + ':' + word, indexes[word], primaryKey); | |
} | |
} | |
} | |
} | |
export async function results( | |
searchId: string, | |
page: number = 1, | |
perPage: number = 10 | |
): Promise<[number, Array<string>]> { | |
page -= 1; | |
const primaryKeys: Array<string> = await SearchClient.zrevrangeAsync( | |
searchId, | |
page * perPage, | |
(page + 1) * perPage - 1 | |
); | |
const count = await SearchClient.zcountAsync(searchId, '-inf', '+inf'); | |
return [count, primaryKeys]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
concept: namespace
Imagine a heap that each object belongs to space,
namespace
represent that spaceFor example, we might have those namespaces in a project:
products
,users
,...How to use?
Indexing
The above code index the
userObj
which is an object containing our user's information and sets the primary key to it'suuid
, also prevents from indexing uuid and password of user.Creating a search space
To do a search we have to run some code like this:
The code above generates a new search space which we can use for half an hour using it's id.
Fetching data
This code tries to fetch first page of results: