Burrows-Wheeler Transform; BWT, Move-to-front transform
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Burrows-Wheeler Transform; BWT | |
// http://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform | |
// http://www.geocities.jp/m_hiroi/light/pyalgo48.html | |
// | |
var _ = require('underscore'); | |
function BWT(){} | |
BWT.encode = function(data) { | |
var size = data.length; | |
var buff = data + data; | |
var idx = _.range(size).sort(function(x, y){ | |
for (var i = 0; i < size; i++) { | |
var r = buff[x + i].charCodeAt() - buff[y + i].charCodeAt(); | |
if (r !== 0) return r; | |
} | |
return 0; | |
}); | |
var top; | |
var work = _.reduce(_.range(size), function(memo, k){ | |
var p = idx[k]; | |
if (p === 0) top = k; | |
memo.push(buff[p + size - 1]); | |
return memo; | |
}, []).join(''); | |
return { top: top, data: work }; | |
}; | |
BWT.decode = function(top, data) { | |
var size = data.length; | |
var idx = _.range(size).sort(function(x, y){ | |
var c = data[x].charCodeAt() - data[y].charCodeAt(); | |
if (c === 0) return x - y; | |
return c; | |
}); | |
var p = idx[top]; | |
return _.reduce(_.range(size), function(memo){ | |
memo.push(data[p]); | |
p = idx[p]; | |
return memo; | |
}, []).join(''); | |
}; | |
module.exports = BWT; | |
// var input = 'abacadaeafagahaiajakalaman'; | |
// console.log(input); | |
// | |
// var o = encode(input); | |
// console.log(o); | |
// console.log(o.data === 'nbcdefghijklmaaaaaaaaaaaaa' ? 'true' : 'false'); | |
// | |
// var o = decode(o.top, o.data); | |
// console.log(o); | |
// console.log(o === input ? 'true' : 'false'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Move-to-Front; MTF | |
// http://en.wikipedia.org/wiki/Move-to-front_transform | |
// http://www.geocities.jp/m_hiroi/light/pyalgo48.html | |
// | |
var _ = require('underscore'); | |
var BWT = require('./bwt.js'); | |
function MTF(){} | |
MTF.encode = function(data) { | |
var table = _.range(256); | |
var res = []; | |
for (var i = 0; i < data.length; i++) { | |
var code = data[i].charCodeAt(); | |
var idx = table.indexOf(code); | |
res.push(idx); | |
if (idx !== 0) { | |
table.splice(idx, 1); | |
table.unshift(code); | |
} | |
} | |
return res; | |
}; | |
MTF.decode = function(data) { | |
var table = _.range(256); | |
var res = []; | |
for (var i = 0; i < data.length; i++) { | |
var c = table[data[i]]; | |
if (data[i] > 0) { | |
table.splice(data[i], 1); | |
table.unshift(c); | |
} | |
res.push(String.fromCharCode(c)); | |
} | |
return res.join(''); | |
}; | |
module.exports = MTF; | |
// | |
// var input, output, expect; | |
// | |
// input = 'abacadaeafagahaiajakalaman'; | |
// var bwt = BWT.encode(input); | |
// console.log('input:', input); | |
// console.log('BWT encode:', bwt.data); | |
// output = MTF.encode(bwt.data); | |
// | |
// expect = [110, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; | |
// console.log('MTF encode:', output.toString()); | |
// console.log(output.toString() === expect.toString() ? 'ok' : 'failed'); | |
// | |
// output = MTF.decode(output); | |
// console.log('MTF decode:', output); | |
// console.log(output === bwt.data ? 'ok' : 'failed'); | |
// | |
// output = BWT.decode(bwt.top, output); | |
// console.log('BWT decode:', output); | |
// console.log(output === input ? 'ok' : 'failed'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment