Skip to content

Instantly share code, notes, and snippets.

@bga
Created August 11, 2010 16:56
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save bga/519305 to your computer and use it in GitHub Desktop.
Save bga/519305 to your computer and use it in GitHub Desktop.
window._gzip = (function()
{
var _matchCount = function(s, word)
{
var n = 0, wordLen = word.length, p = -wordLen;
while((p = s.indexOf(word, p + wordLen)) > -1)
++n;
return n;
};
var _matchCountInDict = function(dict, word)
{
var n = 0, i = dict.length;
while(i--)
{
if(dict[i].word.indexOf(word) > -1)
++n;
}
return n;
};
var _escapeForCString = function(s)
{
return s.replace(/\\/g, '\\\\').
replace(/\'/g, '\\\'').
//replace(/\"/g, '\\\"').
replace(/\r/g, '\\r').
replace(/\0/g, '\\x00').
replace(/\n/g, '\\n');
};
var reEscapeableRE = /[\[\]\(\)\{\}\?\!\:\|\+\.\*\'\"\\]/g;
var _escapeForRegExp = function(s)
{
return s.replace(reEscapeableRE, '\\$&');
};
var _escapeForRegExpReplace = function(s)
{
return s.replace(/\$/g, '\\$$');
};
var _removeEscapebleChars = function(a)
{
delete a['^'];
delete a['\"'];
delete a['\''];
delete a['\\'];
delete a['$'];
delete a['['];
delete a[']'];
delete a['('];
delete a[')'];
delete a['{'];
delete a['}'];
delete a['?'];
delete a['!'];
delete a[':'];
delete a['|'];
delete a['+'];
delete a['`'];
delete a['.'];
delete a['*'];
};
var _dumpDict = function(dict)
{
var t = '';
var d, i = -1; while((d = dict[++i]))
t += d.word + '--';
return t;
};
//var unpackCode = 'for(var s=\'__CODE__\',d=\'__DICT__\'.split("`"),i=0;v=d[i++];)s=s.replace(RegExp(v[0],"g"),v.slice(1));console.log(s)';
var unpackCode = 'for(var s=\'__CODE__\',d=\'__DICT__\'.split("`"),i=0;v=d[i++];)s=s.replace(RegExp(v[0],"g"),v.slice(1));eval(s)';
/**
@fn pack and wrap to <unpackCode> code <s> using gzip like algo (dictonaty based)
@param allowNonAcsiiChars {Boolean} {Default = true} true if you allow use 32-256 chars range else 32-127.
@param minProfit {Number} {Default = 0} threshold in bytes for each replace pair
@return packed and wrapped to <unpackCode> code
*/
var _gzip = function(s, allowNonAcsiiChars, minProfit)
{
if(allowNonAcsiiChars == null)
allowNonAcsiiChars = true;
if(minProfit == null)
minProfit = 1;
var availableCharMap = {};
var i = 31, e = (allowNonAcsiiChars) ? 256 : 127;
// gen all available char map
while(++i < e)
availableCharMap[String.fromCharCode(i)] = 0;
// and remove some chars which require escaping (in RegExp too)
_removeEscapebleChars(availableCharMap);
// reduce availableCharMap - remove chars which used in <s>
i = -1, e = s.length; while(++i < e)
{
delete availableCharMap[s.charAt(i)];
}
// convert <allowNonAcsiiChars> to <availableChars>
var availableChars = [], j = 0;
for(var i in availableCharMap)
{
if(availableCharMap.hasOwnProperty(i))
availableChars[j++] = i;
}
//console.log(availableChars.join('> <'));
//console.log(availableChars);
var sLen = s.length;
var
dict =
[
{word: '\'', matchCount: _matchCount(s, '\'') },
{word: '\n', matchCount: _matchCount(s, '\n') },
{word: '\\', matchCount: _matchCount(s, '\\') },
{word: '\r', matchCount: _matchCount(s, '\r') }
],
dictLen = 0;
var dictWordMap = {};
// build <dict>
i = -1, e = sLen - 2; while(++i < e)
{
var k = i + 1;
while(++k < sLen && (s.slice(i, k) in dictWordMap))
;
var word, j = sLen;
while((j = s.lastIndexOf((word = s.slice(i, k)), j)) > k)
{
var m = j;
j += k - i;
do
{
dictWordMap[word] = 0;
var len = k - i;
var n = 2, p = -len;
var t = s.slice(k, m);
while((p = t.indexOf(word, p + len)) > -1)
++n;
dict[dictLen++] = {word: word, matchCount: n};
word = s.slice(i, ++k);
}
while(++j < sLen && s.charAt(k - 1) == s.charAt(j - 1))
}
}
//return;
// word will be replaced using RegExp - prepare escaped vestion
var d, i = -1; while((d = dict[++i]))
d.escapedWord = _escapeForRegExp(d.word);
// calc profit of each word in <dict> and remove words with profit < <minProfit>
var _escapeForCStringC = _escapeForCString;
var d, i = -1, j = -1; while((d = dict[++i]))
{
var escapedWordLen = _escapeForCStringC(d.word.replace(/\$/g, '\\$$')).length;
var profit = d.matchCount*(d.word.length - 1)- escapedWordLen - 1 - 1;
if(profit > minProfit)
{
d.profit = profit;
dict[++j] = d;
}
}
dict.length = dictLen = j + 1;
//console.log(_dumpDict(dict));
// and sort <dict> from last to top by profit and word length
dict.sort(function(a, b){ return (b.profit - a.profit) || (b.word.length - a.word.length); });
// assign replaceChar, replace words to replaceChars and remove unused words
var t = s, availableCharsLen = availableChars.length;
var d, i = -1, j = 0; while((d = dict[++i]) && j < availableCharsLen)
{
if(t.indexOf(d.word) > -1)
{
d.replaceChar = availableChars[j];
t = t.replace(new RegExp(d.escapedWord, 'g'), d.replaceChar);
dict[j] = d;
j++;
}
}
dict.length = dictLen = j;
//console.log(dict);
//return;
//console.log('t = ', t);
// compress <dict>
// calc match one word in other words
var d, i = -1; while((d = dict[++i]))
d.matchCountInDict = _matchCountInDict(dict, d.word);
// sort <dict> by matchCountInDict from max to min
dict.sort(function(a, b){ return a.matchCountInDict - b.matchCountInDict; });
// redice <dict>
var d, i = -1; while((d = dict[++i]))
{
var word = d.word;
var j = i, d2; while((d2 = dict[++i]))
word = word.replace(new RegExp(d2.escapedWord, 'g'), d2.replaceChar);
d.word = word;
}
//console.log(_dumpDict(dict));
//console.log(dict);
//return;
// final
// serialize <dict>
var dictString = '';
var d, i = -1; while((d = dict[++i]))
dictString += d.replaceChar + _escapeForRegExpReplace(d.word) + '`';
dictString = dictString.slice(0, -1);
//console.log(dictString);
// and create unpack code
var code = unpackCode.
replace('__DICT__', _escapeForCString(dictString)).
replace('__CODE__', _escapeForCString(t))
;
console.log('code.length = ', code.length);
console.log('s.length = ', s.length);
console.log('profit = ', (s.length/code.length*100).toFixed(2), '%');
return code;
};
return _gzip;
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment