Instantly share code, notes, and snippets.

Embed
What would you like to do?
A simple implementation of the Boyer-Moore string search algorithm for use with node.js' Buffer objects.

What?!

A simple implementation of the Boyer-Moore string search algorithm for use with node.js' Buffer objects.

Why?

I needed this functionality in one of my projects. I started to write a search algorithm right away, until (after a few lines) I realized that it's absolutely unneccessary to do so - because others must have done this before (this "reinventing the wheel" thing, you know).

So I DuckDuckGoed (they really need a new name for that search engine, by the way) around the interwebs to find a suitable approach to my problem.

I immediately stumbled over the Boyer-Moore string search algorithm. Since I didn't know anything about how efficient string search is done (shame on me), I just started to write a ripoff from the Java example given in the Wikipedia article to get started learning about it.

That's why.

Usage

var moore = require( './buffer-moore' );

var index = moore.indexOf(
  // the needle
  new Buffer( 'find me' ),
  // the haystack
  new Buffer( 'Some garbage text and the `find me` sequence.' )
);
var horspool = require( './buffer-horspool' )

var haystack = new Buffer( 'Some garbage text and the `find me` sequence.' )
var needle = new Buffer( 'find me' )

var index = horspool( haystack, needle )
function boyerMooreHorspool( haystack, needle, start ) {
var nlen = needle.length
var hlen = haystack.length
if( nlen <= 0 || hlen <= 0 )
return -1
var jump, offset = start || 0
var scan = 0
var last = nlen - 1
var skip = {}
for( scan = 0; scan < last; scan++ ) {
skip[ needle[ scan ] ] = last - scan
}
while( hlen >= nlen ) {
for( scan = last; haystack[ offset + scan ] === needle[ scan ]; scan-- ) {
if( scan === 0 ) { return offset }
}
jump = skip[ haystack[ offset + last ] ]
jump = jump != null ? jump : nlen
hlen -= jump
offset += jump
}
return -1
}
// Exports
module.exports = boyerMooreHorspool
'use strict'
module.exports = {
alphabetSize: 256,
/*
Returns the index of the first occurence of
the `needle` buffer within the `haystack` buffer.
@param {Buffer} needle
@param {Buffer} haystack
@return {Integer}
*/
indexOf: function( needle, haystack ) {
var i, k;
var n = needle.length;
var m = haystack.length;
if( n === 0 ) return n;
var charTable = this.makeCharTable( needle );
var offsetTable = this.makeOffsetTable( needle );
for( i = n - 1; i < m; ) {
for( k = n - 1; needle[k] === haystack[i]; --i, --k ) {
if( k === 0 ) return i;
}
// i += n - k; // for naive method
i += Math.max( offsetTable[ n - 1 - k ], charTable[ haystack[i] ] );
}
return -1;
},
/*
Makes the jump table based on the
mismatched character information.
@param {Buffer} needle
@return {Buffer}
*/
makeCharTable: function( needle ) {
var table = new Uint32Array( this.alphabetSize );
var n = needle.length;
var t = table.length;
var i = 0;
for( ; i < t; ++i ) {
table[i] = n;
}
n--;
for( i = 0; i < n; ++i ) {
table[ needle[i] ] = n - i;
}
return table;
},
/*
Makes the jump table based on the
scan offset which mismatch occurs.
@param {Buffer} needle
*/
makeOffsetTable: function( needle ) {
var i, suffix;
var n = needle.length;
var m = n - 1;
var lastPrefix = n;
var table = new Uint32Array( n );
for( i = m; i >= 0; --i ) {
if( this.isPrefix( needle, i + 1 ) ) {
lastPrefix = i + 1;
}
table[ m - i ] = lastPrefix - i + m;
}
for( i = 0; i < n; ++i ) {
suffix = this.suffixLength( needle, i );
table[ suffix ] = m - i + suffix;
}
return table;
},
/*
Is `needle[i:end]` a prefix of `needle`?
@param {Buffer} needle
@param {Integer} i
*/
isPrefix: function( needle, i ) {
var k = 0;
var n = needle.length;
for( ; i < n; ++i, ++k ) {
if( needle[i] !== needle[k] ) {
return false;
}
}
return true;
},
/*
Returns the maximum length of the
substring ends at `i` and is a suffix.
@param {Buffer} needle
@param {Integer} i
*/
suffixLength: function( needle, i ) {
var k = 0;
var n = needle.length;
var m = n - 1;
for( ; i >= 0 && needle[i] === needle[m]; --i, --m ) {
k += 1;
}
return k;
}
};
@adityatj

This comment has been minimized.

Show comment
Hide comment
@adityatj

adityatj Jun 23, 2016

Can I use a plain string instead of Buffer for using this script on client side?

adityatj commented Jun 23, 2016

Can I use a plain string instead of Buffer for using this script on client side?

@jhermsmeier

This comment has been minimized.

Show comment
Hide comment
@jhermsmeier

jhermsmeier Jan 18, 2017

@adityatj you should just be able to use String.prototype.indexOf:

var haystack = 'Hello shiny planet'
haystack.indexOf('shiny') // => 6
Owner

jhermsmeier commented Jan 18, 2017

@adityatj you should just be able to use String.prototype.indexOf:

var haystack = 'Hello shiny planet'
haystack.indexOf('shiny') // => 6
@mghifariy

This comment has been minimized.

Show comment
Hide comment
@mghifariy

mghifariy Apr 20, 2018

nice code. can this search for multiple pattern?

mghifariy commented Apr 20, 2018

nice code. can this search for multiple pattern?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment