Created
February 25, 2011 14:55
-
Star
(143)
You must be signed in to star a gist -
Fork
(19)
You must be signed in to fork a gist
-
-
Save revolunet/843889 to your computer and use it in GitHub Desktop.
LZW javascript compress/decompress
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// LZW-compress a string | |
function lzw_encode(s) { | |
var dict = {}; | |
var data = (s + "").split(""); | |
var out = []; | |
var currChar; | |
var phrase = data[0]; | |
var code = 256; | |
for (var i=1; i<data.length; i++) { | |
currChar=data[i]; | |
if (dict[phrase + currChar] != null) { | |
phrase += currChar; | |
} | |
else { | |
out.push(phrase.length > 1 ? dict[phrase] : phrase.charCodeAt(0)); | |
dict[phrase + currChar] = code; | |
code++; | |
phrase=currChar; | |
} | |
} | |
out.push(phrase.length > 1 ? dict[phrase] : phrase.charCodeAt(0)); | |
for (var i=0; i<out.length; i++) { | |
out[i] = String.fromCharCode(out[i]); | |
} | |
return out.join(""); | |
} | |
// Decompress an LZW-encoded string | |
function lzw_decode(s) { | |
var dict = {}; | |
var data = (s + "").split(""); | |
var currChar = data[0]; | |
var oldPhrase = currChar; | |
var out = [currChar]; | |
var code = 256; | |
var phrase; | |
for (var i=1; i<data.length; i++) { | |
var currCode = data[i].charCodeAt(0); | |
if (currCode < 256) { | |
phrase = data[i]; | |
} | |
else { | |
phrase = dict[currCode] ? dict[currCode] : (oldPhrase + currChar); | |
} | |
out.push(phrase); | |
currChar = phrase.charAt(0); | |
dict[code] = oldPhrase + currChar; | |
code++; | |
oldPhrase = phrase; | |
} | |
return out.join(""); | |
} |
Awesome code, under which license have you released this under?
Thanks & keep on rock'n.
this code has been took from somewhere and i dont remember where or why... i'm not the author of this code
A version designed for long string storage in UTF-8 encoded files using a base64 encoding. Works with unicode (including surrogates).
https://github.com/antonylesuisse/lzwjs/blob/master/lzw.js
function lzw64_encode(s) {
if (!s) return s;
var b64="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
var d=new Map();
var s=unescape(encodeURIComponent(s)).split("");
var word=s[0];
var num=256;
var key;
var o=[];
function out(word,num) {
key=word.length>1 ? d.get(word) : word.charCodeAt(0);
o.push(b64[key&0x3f]);
o.push(b64[(key>>6)&0x3f]);
o.push(b64[(key>>12)&0x3f]);
}
for (var i=1; i<s.length; i++) {
var c=s[i];
if (d.has(word+c)) {
word+=c;
} else {
d.set(word+c,num++);
out(word,num);
word=c;
if(num==(1<<18)-1) {
d.clear();
num=256;
}
}
}
out(word);
return o.join("");
}
function lzw64_decode(s) {
var b64="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
var b64d={};
for(var i=0; i<64; i++){
b64d[b64.charAt(i)]=i;
}
var d=new Map();
var num=256;
var word=String.fromCharCode(b64d[s[0]]+(b64d[s[1]]<<6)+(b64d[s[2]]<<12));
var prev=word;
var o=[word];
for(var i=3; i<s.length; i+=3) {
var key=b64d[s[i]]+(b64d[s[i+1]]<<6)+(b64d[s[i+2]]<<12);
word=key<256 ? String.fromCharCode(key) : d.has(key) ? d.get(key) : word+word.charAt(0);
o.push(word);
d.set(num++, prev+word.charAt(0));
prev=word;
if(num==(1<<18)-1) {
d.clear();
num=256;
}
}
return decodeURIComponent(escape(o.join("")));
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
My data was UTF-16 and I had a similar problem. Firstly, once the code entries got above #10000 the encoding routine only stored the first 16 bits. So I switched Andrews54757 code above to use codePoint functions to return proper values (surrogate pairs) for codes above #10000. I then in turn had to get the incrementing code values in both routines to jump between $d800 and $e000 to avoid code values in that region that would get incorrectly interpreted as surrogate pairs.
// from https://gist.github.com/revolunet/843889