Skip to content

Instantly share code, notes, and snippets.

@jmcd
Created March 8, 2019 13:03
Show Gist options
  • Save jmcd/c66b4146a49e31b525b866324e27adbb to your computer and use it in GitHub Desktop.
Save jmcd/c66b4146a49e31b525b866324e27adbb to your computer and use it in GitHub Desktop.
LZ77Compressor
using System;
using System.Collections.Generic;
using System.Linq;
namespace LZ77
{
public static class Compressor
{
// it's impossible for a pointer to start with this because that would mean a length of 1
private const int Escape = 0x1;
private static bool DoMatch(byte[] input, int i, int j, int length)
{
for (var offset = 0; offset < length; offset++)
{
if (input[i + offset] != input[j + offset])
{
return false;
}
}
return true;
}
private static (int jump, int length)? Find(byte[] input, int searchTargetMaxLen, int minSearchIndex, int i)
{
(int jump, int length)? best = null;
for (var searchIndex = minSearchIndex; searchIndex < i - searchTargetMaxLen; searchIndex++)
{
var matchedLen = 0;
// pointer is 2 bytes + escape byte, so no point matching less than 4
for (var searchTargetLen = 4; searchTargetLen <= searchTargetMaxLen; searchTargetLen++)
{
var found = DoMatch(input, searchIndex, i, searchTargetLen);
if (!found)
{
break;
}
matchedLen = searchTargetLen;
}
if (matchedLen <= 0)
{
continue;
}
var jump = i - searchIndex;
if (best.HasValue)
{
if (best.Value.length < matchedLen)
{
best = (jump, matchedLen);
}
}
else
{
best = (jump, matchedLen);
}
}
return best;
}
public static byte[] Compress(byte[] input)
{
var output = new List<byte>(input.Length);
for (var i = 0; i < input.Length;)
{
var remainingInputLength = input.Length - i;
var minSearchIndex = Math.Max(0, i - PointerMaxJump);
var searchTargetMaxLen = new[]
{
PointerMaxLength,
remainingInputLength,
i,
}.Min();
var pointer = Find(input, searchTargetMaxLen, minSearchIndex, i);
if (pointer.HasValue)
{
var (lhs, rhs) = PointerToBytes(pointer.Value.jump, pointer.Value.length);
output.Add(Escape);
output.Add(lhs);
output.Add(rhs);
i += pointer.Value.length;
}
else
{
output.Add(input[i]);
// If actually writing a 1, then write another 1 after as anti-escape
if (input[i] == Escape)
{
output.Add(Escape);
}
i += 1;
}
}
return output.ToArray();
}
public static byte[] Decompress(byte[] compressed)
{
var output = new List<byte>();
for (var i = 0; i < compressed.Length;)
{
var b = compressed[i];
var isPointer = b == 0x1 && (i + 2) < compressed.Length && compressed[i + 1] != 0x1;
if (isPointer)
{
var pointer = PointerFromBytes(compressed[i + 1], compressed[i + 2]);
var offset = output.Count - pointer.jump;
for (var j = 0; j < pointer.length; j++)
{
var index = offset + j;
var z = output[index];
output.Add(z);
}
i += 3;
}
else
{
output.Add(b);
i += b == 0x1 ? 2 : 1;
}
}
return output.ToArray();
}
/**
* Pointer is jump and length, spread over two bytes
*
* LHS RHS
* 1 1
* 2431 2431
* 84268421 84268421
* JmsbLeng Jlsb....
*/
private const int PointerMaxJump = 0xfff;
private const int PointerMaxLength = 0xf;
private static (byte lhs, byte rhs) PointerToBytes(int jump, int length)
{
var i = jump & 0xf00;
var l = (i >> 4);
var lhs = (byte) (l | length);
var rhs = (byte) jump;
return (lhs, rhs);
}
private static (int jump, int length) PointerFromBytes(byte lhs, byte rhs)
{
var len = lhs & 0x00f;
var jmpMsb = (lhs & 0xf0) << 4;
var jmpLsb = rhs;
var jmp = jmpMsb | jmpLsb;
return (jmp, len);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment