Last active
August 29, 2015 14:11
-
-
Save jabney/fff24f5b187c5ad458a4 to your computer and use it in GitHub Desktop.
A toy implementation of Huffman compression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// huffman.js © 2014 James Abney http://github.com/jabney | |
// Huffman coding was invented by David A. Huffman of MIT, 1952. | |
(function() { | |
'use strict'; | |
// A toy implementation of Huffman compression. The result of | |
// encoding is a bit string. In this form it's not truly compressed, | |
// but the bit string represents the binary values after compression. | |
function huffman(str) { | |
var ft = frequencyTable(str), ht = huffTree(ft); | |
return { | |
// Encode a string into its compressed, binary representation. | |
encode: function() { | |
var bits = ''; | |
str.split('').forEach(function(c) { | |
var node = ft[c], enc = []; | |
// Start at a leaf (character) node and trace up to the root. | |
while (node.parent) { | |
// Right nodes get a '1' and left get '0'; | |
enc.push(node === node.parent.right ? '1' : '0'); | |
node = node.parent; | |
} | |
// Add the symbol's bits to the output. | |
bits += enc.reverse().join(''); | |
}); | |
return bits; | |
}, | |
// Decode a bit string into its original text form. | |
decode: function(code) { | |
var a = code.split('').reverse(), node, str = ''; | |
while(a.length) { | |
// Start at root and swallow each bit to navigate the tree. | |
node = ht; | |
while(node.chr === null) | |
// Find right or left path through the tree. | |
node = find(a.pop(), node); | |
str += node.chr; | |
} | |
return str; | |
} | |
}; | |
// Return a node to be added to the tree. | |
function node(chr, freq, right, left) { | |
return { chr:chr, freq:freq, right:right, left:left }; | |
} | |
// Return a table of frequencies based on the string. | |
function frequencyTable(str) { | |
var h = Object.create(null), k, len = str.length; | |
str.split('').forEach(function(c) { | |
h[c] && h[c].freq++ || (h[c] = node(c, 1)); | |
}); | |
return h; | |
} | |
// Build the tree that enables Huffman compression. | |
// When the algorithm finishes, the node queue will have | |
// a single remaining element: the root of the tree. | |
function huffTree(ft) { | |
var a, b, n, pq = nodeQueue(ft); | |
while(pq.size() > 1) { | |
a = pq.remove(); | |
b = pq.remove(); | |
n = node(null, a.freq + b.freq, a, b); | |
a.parent = b.parent = n; | |
pq.insert(n); | |
} | |
// Return the root node. | |
return pq.remove(); | |
} | |
// Create and populate a priority queue | |
// with nodes from the frequency table. | |
function nodeQueue(ft) { | |
var k, pq, nodeList = []; | |
for (k in ft) | |
nodeList.push(ft[k]); | |
return new PriorityQueue( | |
function(a, b) { return a.freq < b.freq; }) | |
.init(nodeList); | |
} | |
// Find the next node during decode. | |
function find(bit, obj) { | |
return bit === '1' ? obj.right : obj.left; | |
} | |
// A heap-based priority queue. | |
function PriorityQueue(compare) { | |
var compare = compare || function(a, b) { return a < b; }, | |
heap = [], | |
slice = Array.prototype.slice; | |
// Initialize the priority queue with an array of items. | |
this.init = function(items) { | |
if (heap.length) heap = []; | |
this.insert.apply(null, items); | |
return this; | |
}; | |
// Insert one or more items into the priority queue. | |
this.insert = function() { | |
slice.call(arguments, 0).forEach(function(item) { | |
heap.push(item); | |
float(heap.length - 1); | |
}); | |
return this; | |
}; | |
// Remove and return the next queue element. | |
this.remove = function() { | |
var head = null, last; | |
if (heap.length) { | |
head = heap[0]; | |
last = heap.length - 1; | |
swap(0, last); | |
heap.splice(last, 1); | |
sink(0); | |
} | |
return head; | |
}; | |
// Clear all items from the queue. | |
this.clear = function() { | |
heap = []; | |
return this; | |
}; | |
// Return the top item without modifying the queue. | |
this.peek = function() { | |
return heap[0]; | |
}; | |
// Return the number of items in the queue. | |
this.size = function() { | |
return heap.length; | |
}; | |
// Return true if the queue is empty. | |
this.empty = function() { | |
return !this.length; | |
}; | |
// Sink an item from the top down to heap order. | |
function sink(h) { | |
var left = 2*h+1, right = 2*h+2, child = left; | |
if (left < heap.length) { | |
right < heap.length && compare(heap[right], heap[left]) && child++; | |
if (compare(heap[child], heap[h])) { | |
swap(h, child); | |
sink(child); | |
} | |
} | |
} | |
// Float an item from the bottom up to heap order. | |
function float(h) { | |
var parent = Math.floor((h-1)/2); | |
if (parent >= 0 && compare(heap[h], heap[parent])) { | |
swap(h, parent); | |
float(parent); | |
} | |
} | |
// Swap two heap elements by index. | |
function swap(i, j) { | |
var tmp = heap[i]; | |
heap[i] = heap[j]; | |
heap[j] = tmp; | |
} | |
} | |
} | |
// This is the string to be compressed. | |
var di = "We hold these truths to be self-evident, that all men are \ | |
created equal, that they are endowed by their Creator with certain \ | |
unalienable Rights, that among these are Life, Liberty and the \ | |
pursuit of Happiness.", | |
// Initialize the codec. | |
huff = huffman(di), en; | |
// Encode and decode. | |
console.log('encoded bits:', en = huff.encode()); | |
console.log('decoded string:', huff.decode(en)); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment