Skip to content

Instantly share code, notes, and snippets.

@etrepum
Last active April 15, 2018 20:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save etrepum/6235082 to your computer and use it in GitHub Desktop.
Save etrepum/6235082 to your computer and use it in GitHub Desktop.
Boyer-Moore in JavaScript, because Uint8Array doesn't have an indexOf.
function asUint8Array(input) {
if (input instanceof Uint8Array) {
return input;
} else if (typeof(input) === 'string') {
// This naive transform only supports ASCII patterns. UTF-8 support
// not necessary for the intended use case here.
var arr = new Uint8Array(input.length);
for (var i = 0; i < input.length; i++) {
var c = input.charCodeAt(i);
if (c > 127) {
throw new TypeError("Only ASCII patterns are supported");
}
arr[i] = c;
}
return arr;
} else {
// Assume that it's already something that can be coerced.
return new Uint8Array(input);
}
}
function boyerMoore(patternBuffer) {
// Implementation of Boyer-Moore substring search ported from page 772 of
// Algorithms Fourth Edition (Sedgewick, Wayne)
// http://algs4.cs.princeton.edu/53substring/BoyerMoore.java.html
/*
USAGE:
// needle should be ASCII string, ArrayBuffer, or Uint8Array
// haystack should be an ArrayBuffer or Uint8Array
var search = boyerMoore(needle);
var skip = search.byteLength;
var indexes = [];
for (var i = search(haystack); i !== -1; i = search(haystack, i + skip)) {
indexes.push(i);
}
*/
var pattern = asUint8Array(patternBuffer);
var M = pattern.length;
if (M === 0) {
throw new TypeError("patternBuffer must be at least 1 byte long");
}
// radix
var R = 256;
var rightmost_positions = new Int32Array(R);
// position of the rightmost occurrence of the byte c in the pattern
for (var c = 0; c < R; c++) {
// -1 for bytes not in pattern
rightmost_positions[c] = -1;
}
for (var j = 0; j < M; j++) {
// rightmost position for bytes in pattern
rightmost_positions[pattern[j]] = j;
}
function boyerMooreSearch(txtBuffer, start, end) {
// Return offset of first match, -1 if no match.
var txt = asUint8Array(txtBuffer);
if (start === undefined) start = 0;
if (end === undefined) end = txt.length;
var pat = pattern;
var right = rightmost_positions;
var lastIndex = end - pat.length;
var lastPatIndex = pat.length - 1;
var skip;
for (var i = start; i <= lastIndex; i += skip) {
skip = 0;
for (var j = lastPatIndex; j >= 0; j--) {
var c = txt[i + j];
if (pat[j] !== c) {
skip = Math.max(1, j - right[c]);
break;
}
}
if (skip === 0) {
return i;
}
}
return -1;
};
boyerMooreSearch.byteLength = pattern.byteLength;
return boyerMooreSearch;
}
@Xample
Copy link

Xample commented Apr 15, 2018

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment