Skip to content

Instantly share code, notes, and snippets.

@stukennedy
Created November 12, 2016 00:47
Show Gist options
  • Save stukennedy/2f1b6963b504e92013574ac04983a943 to your computer and use it in GitHub Desktop.
Save stukennedy/2f1b6963b504e92013574ac04983a943 to your computer and use it in GitHub Desktop.
A simple implementation of the Boyer-Moore string search algorithm for use with node.js' Buffer objects.

What?!

A simple implementation of the Boyer-Moore string search algorithm for use with node.js' Buffer objects.

Why?

I needed this functionality in one of my projects. I started to write a search algorithm right away, until (after a few lines) I realized that it's absolutely unneccessary to do so - because others must have done this before (this "reinventing the wheel" thing, you know).

So I DuckDuckGoed (they really need a new name for that search engine, by the way) around the interwebs to find a suitable approach to my problem.

I immediately stumbled over the Boyer-Moore string search algorithm. Since I didn't know anything about how effective string search is done (shame on me), I just started to write a ripoff from the Java example given in the Wikipedia article to get started learning about it.

That's why.

Usage

var moore = require( './moore' );

var index = moore.indexOf(
  // the needle
  new Buffer( 'find me' ),
  // the haystack
  new Buffer( 'Some garbage text and the `find me` sequence.' )
);
'use strict'
module.exports = {
alphabetSize: 256,
/*
Returns the index of the first occurence of
the `needle` buffer within the `haystack` buffer.
@param {Buffer} needle
@param {Buffer} haystack
@return {Integer}
*/
indexOf: function( needle, haystack ) {
var i, k;
var n = needle.length;
var m = haystack.length;
if( n === 0 ) return n;
var charTable = this.makeCharTable( needle );
var offsetTable = this.makeOffsetTable( needle );
for( i = n - 1; i < m; ) {
for( k = n - 1; needle[k] === haystack[i]; --i, --k ) {
if( k === 0 ) return i;
}
// i += n - k; // for naive method
i += Math.max( offsetTable[ n - 1 - k ], charTable[ haystack[i] ] );
}
return -1;
},
/*
Makes the jump table based on the
mismatched character information.
@param {Buffer} needle
@return {Buffer}
*/
makeCharTable: function( needle ) {
var table = new Uint32Array( this.alphabetSize );
var n = needle.length;
var t = table.length;
var i = 0;
for( ; i < t; ++i ) {
table[i] = n;
}
n--;
for( i = 0; i < n; ++i ) {
table[ needle[i] ] = n - i;
}
return table;
},
/*
Makes the jump table based on the
scan offset which mismatch occurs.
@param {Buffer} needle
*/
makeOffsetTable: function( needle ) {
var i, suffix;
var n = needle.length;
var m = n - 1;
var lastPrefix = n;
var table = new Uint32Array( n );
for( i = m; i >= 0; --i ) {
if( this.isPrefix( needle, i + 1 ) ) {
lastPrefix = i + 1;
}
table[ m - i ] = lastPrefix - i + m;
}
for( i = 0; i < n; ++i ) {
suffix = this.suffixLength( needle, i );
table[ suffix ] = m - i + suffix;
}
return table;
},
/*
Is `needle[i:end]` a prefix of `needle`?
@param {Buffer} needle
@param {Integer} i
*/
isPrefix: function( needle, i ) {
var k = 0;
var n = needle.length;
for( ; i < n; ++i, ++k ) {
if( needle[i] !== needle[k] ) {
return false;
}
}
return true;
},
/*
Returns the maximum length of the
substring ends at `i` and is a suffix.
@param {Buffer} needle
@param {Integer} i
*/
suffixLength: function( needle, i ) {
var k = 0;
var n = needle.length;
var m = n - 1;
for( ; i >= 0 && needle[i] === needle[m]; --i, --m ) {
k += 1;
}
return k;
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment