Skip to content

Instantly share code, notes, and snippets.

@revolunet
Created February 25, 2011 14:55
Show Gist options
  • Save revolunet/843889 to your computer and use it in GitHub Desktop.
Save revolunet/843889 to your computer and use it in GitHub Desktop.
LZW javascript compress/decompress
// LZW-compress a string
function lzw_encode(s) {
var dict = {};
var data = (s + "").split("");
var out = [];
var currChar;
var phrase = data[0];
var code = 256;
for (var i=1; i<data.length; i++) {
currChar=data[i];
if (dict[phrase + currChar] != null) {
phrase += currChar;
}
else {
out.push(phrase.length > 1 ? dict[phrase] : phrase.charCodeAt(0));
dict[phrase + currChar] = code;
code++;
phrase=currChar;
}
}
out.push(phrase.length > 1 ? dict[phrase] : phrase.charCodeAt(0));
for (var i=0; i<out.length; i++) {
out[i] = String.fromCharCode(out[i]);
}
return out.join("");
}
// Decompress an LZW-encoded string
function lzw_decode(s) {
var dict = {};
var data = (s + "").split("");
var currChar = data[0];
var oldPhrase = currChar;
var out = [currChar];
var code = 256;
var phrase;
for (var i=1; i<data.length; i++) {
var currCode = data[i].charCodeAt(0);
if (currCode < 256) {
phrase = data[i];
}
else {
phrase = dict[currCode] ? dict[currCode] : (oldPhrase + currChar);
}
out.push(phrase);
currChar = phrase.charAt(0);
dict[code] = oldPhrase + currChar;
code++;
oldPhrase = phrase;
}
return out.join("");
}
@antonylesuisse
Copy link

antonylesuisse commented Apr 6, 2021

A version designed for long string storage in UTF-8 encoded files using a base64 encoding. Works with unicode (including surrogates).

https://github.com/antonylesuisse/lzwjs/blob/master/lzw.js

function lzw64_encode(s) {
    if (!s) return s;
    var b64="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
    var d=new Map();
    var s=unescape(encodeURIComponent(s)).split("");
    var word=s[0];
    var num=256;
    var key;
    var o=[];
    function out(word,num) {
        key=word.length>1 ? d.get(word) : word.charCodeAt(0);
        o.push(b64[key&0x3f]);
        o.push(b64[(key>>6)&0x3f]);
        o.push(b64[(key>>12)&0x3f]);
    }
    for (var i=1; i<s.length; i++) {
        var c=s[i];
        if (d.has(word+c)) {
            word+=c;
        } else {
            d.set(word+c,num++);
            out(word,num);
            word=c;
            if(num==(1<<18)-1) {
                d.clear();
                num=256;
            }
        }
    }
    out(word);
    return o.join("");
}

function lzw64_decode(s) {
    var b64="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
    var b64d={};
    for(var i=0; i<64; i++){
        b64d[b64.charAt(i)]=i;
    }
    var d=new Map();
    var num=256;
    var word=String.fromCharCode(b64d[s[0]]+(b64d[s[1]]<<6)+(b64d[s[2]]<<12));
    var prev=word;
    var o=[word];
    for(var i=3; i<s.length; i+=3) {
        var key=b64d[s[i]]+(b64d[s[i+1]]<<6)+(b64d[s[i+2]]<<12);
        word=key<256 ? String.fromCharCode(key) : d.has(key) ? d.get(key) : word+word.charAt(0);
        o.push(word);
        d.set(num++, prev+word.charAt(0));
        prev=word;
        if(num==(1<<18)-1) {
            d.clear();
            num=256;
        }
    }
    return decodeURIComponent(escape(o.join("")));
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment