Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
A simple implementation of the Boyer-Moore string search algorithm for use with node.js' Buffer objects.

What?!

A simple implementation of the Boyer-Moore string search algorithm for use with node.js' Buffer objects.

Why?

I needed this functionality in one of my projects. I started to write a search algorithm right away, until (after a few lines) I realized that it's absolutely unneccessary to do so - because others must have done this before (this "reinventing the wheel" thing, you know).

So I DuckDuckGoed (they really need a new name for that search engine, by the way) around the interwebs to find a suitable approach to my problem.

I immediately stumbled over the Boyer-Moore string search algorithm. Since I didn't know anything about how effective string search is done (shame on me), I just started to write a ripoff from the Java example given in the Wikipedia article to get started learning about it.

That's why.

Usage

var moore = require( './buffer-moore' );

var index = moore.indexOf(
  // the needle
  new Buffer( 'find me' ),
  // the haystack
  new Buffer( 'Some garbage text and the `find me` sequence.' )
);
var horspool = require( './buffer-horspool' )

var haystack = new Buffer( 'Some garbage text and the `find me` sequence.' )
var needle = new Buffer( 'find me' )

var index = horspool( haystack, needle )
function boyerMooreHorspool( haystack, needle, start ) {
var nlen = needle.length
var hlen = haystack.length
if( nlen <= 0 || hlen <= 0 )
return -1
var jump, offset = start || 0
var scan = 0
var last = nlen - 1
var skip = {}
for( scan = 0; scan < last; scan++ ) {
skip[ needle[ scan ] ] = last - scan
}
while( hlen >= nlen ) {
for( scan = last; haystack[ offset + scan ] === needle[ scan ]; scan-- ) {
if( scan === 0 ) { return offset }
}
jump = skip[ haystack[ offset + last ] ]
jump = jump != null ? jump : nlen
hlen -= jump
offset += jump
}
return -1
}
// Exports
module.exports = boyerMooreHorspool
'use strict'
module.exports = {
alphabetSize: 256,
/*
Returns the index of the first occurence of
the `needle` buffer within the `haystack` buffer.
@param {Buffer} needle
@param {Buffer} haystack
@return {Integer}
*/
indexOf: function( needle, haystack ) {
var i, k;
var n = needle.length;
var m = haystack.length;
if( n === 0 ) return n;
var charTable = this.makeCharTable( needle );
var offsetTable = this.makeOffsetTable( needle );
for( i = n - 1; i < m; ) {
for( k = n - 1; needle[k] === haystack[i]; --i, --k ) {
if( k === 0 ) return i;
}
// i += n - k; // for naive method
i += Math.max( offsetTable[ n - 1 - k ], charTable[ haystack[i] ] );
}
return -1;
},
/*
Makes the jump table based on the
mismatched character information.
@param {Buffer} needle
@return {Buffer}
*/
makeCharTable: function( needle ) {
var table = new Uint32Array( this.alphabetSize );
var n = needle.length;
var t = table.length;
var i = 0;
for( ; i < t; ++i ) {
table[i] = n;
}
n--;
for( i = 0; i < n; ++i ) {
table[ needle[i] ] = n - i;
}
return table;
},
/*
Makes the jump table based on the
scan offset which mismatch occurs.
@param {Buffer} needle
*/
makeOffsetTable: function( needle ) {
var i, suffix;
var n = needle.length;
var m = n - 1;
var lastPrefix = n;
var table = new Uint32Array( n );
for( i = m; i >= 0; --i ) {
if( this.isPrefix( needle, i + 1 ) ) {
lastPrefix = i + 1;
}
table[ m - i ] = lastPrefix - i + m;
}
for( i = 0; i < n; ++i ) {
suffix = this.suffixLength( needle, i );
table[ suffix ] = m - i + suffix;
}
return table;
},
/*
Is `needle[i:end]` a prefix of `needle`?
@param {Buffer} needle
@param {Integer} i
*/
isPrefix: function( needle, i ) {
var k = 0;
var n = needle.length;
for( ; i < n; ++i, ++k ) {
if( needle[i] !== needle[k] ) {
return false;
}
}
return true;
},
/*
Returns the maximum length of the
substring ends at `i` and is a suffix.
@param {Buffer} needle
@param {Integer} i
*/
suffixLength: function( needle, i ) {
var k = 0;
var n = needle.length;
var m = n - 1;
for( ; i >= 0 && needle[i] === needle[m]; --i, --m ) {
k += 1;
}
return k;
}
};
@adityatj

Can I use a plain string instead of Buffer for using this script on client side?

@jhermsmeier
Owner

@adityatj you should just be able to use String.prototype.indexOf:

var haystack = 'Hello shiny planet'
haystack.indexOf('shiny') // => 6
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment