Skip to content

Instantly share code, notes, and snippets.

@Geograph-us
Last active May 10, 2022 03:12
Show Gist options
  • Save Geograph-us/a1beae713c337243c478cb5575a22f75 to your computer and use it in GitHub Desktop.
Save Geograph-us/a1beae713c337243c478cb5575a22f75 to your computer and use it in GitHub Desktop.
Compression by PPM + Arithmetic Coder C# implementation (based on C++ algorithm from the book "Data Compression Methods")
//The context compressor, author M.Smirnov.
//The source also can be found at
//http://compression.ru/
//usage:
//c infile outfile //encoding
//d infile outfile //decoding
/*
* Контекстный компрессор Dummy, автор М.Смирнов.
* Исходный текст этой программы также можно найти на сайо
* http://compression.ru/
* Применение:
* c infile outfile - закодировать infile в outfile
* d infile outfile - декодировать infile в outfile
*/
// Арифметическое кодирование Субботина
// Реализация range-кодера, автор Е.Шелвин
// http://www.pilabs.org.ua/sh/aridemo6.zip
const uint Top = 1 << 24;
static readonly RangeCoder ArithmeticCoder = new RangeCoder();
public class RangeCoder
{
private uint code, range, ffNum, cache;
private long low;
private Stream stream;
public void StartEncode(Stream output)
{
low = ffNum = cache = 0;
range = uint.MaxValue;
this.stream = output;
}
public void StartDecode(Stream input)
{
code = 0;
range = uint.MaxValue;
this.stream = input;
for (var _ = 0; _ < 5; _++)
{
code = (code << 8) | (uint)stream.ReadByte();
}
}
public void FinishEncode()
{
low += 1;
for (var _ = 0; _ < 5; _++)
{
ShiftLow();
}
}
//public void FinishDecode() { }
public void Encode(uint cumFreq, uint freq, uint totFreq)
{
low += cumFreq * (range /= totFreq);
range *= freq;
while (range < Top)
{
ShiftLow();
range <<= 8;
}
}
private void ShiftLow()
{
if ((low >> 24) != 0xFF)
{
stream.WriteByte((byte)(cache + (low >> 32)));
var c = 0xFF + (int)(low >> 32);
while (ffNum > 0)
{
stream.WriteByte((byte)c);
ffNum--;
}
cache = (uint)low >> 24;
}
else
ffNum++;
low = (uint)low << 8;
}
public uint GetFreq(uint totFreq)
{
return code / (range /= totFreq);
}
public void DecodeUpdate(uint cumFreq, uint freq, uint totFreq)
{
code -= cumFreq * range;
range *= freq;
while (range < Top)
{
code = (code << 8) | (uint)stream.ReadByte();
range <<= 8;
}
}
}
public class ContextModel
{
public int Esc;
public int TotFr;
public int[] Count = new int[256];
};
static readonly ContextModel[] cm = new ContextModel[257];
static readonly ContextModel[] Stack = new ContextModel[2];
static readonly int[] Context = new int[1];
static int SP;
const int MaxTotFr = 0x3fff;
static void InitModel()
{
for (var i = 0; i < cm.Length; i++) cm[i] = new ContextModel();
for (var i = 0; i < Stack.Length; i++) Stack[i] = new ContextModel();
for (var j = 0; j < 256; j++) cm[256].Count[j] = 1;
cm[256].TotFr = 256;
cm[256].Esc = 1;
Context[0] = SP = 0;
}
static int EncodeSym(ContextModel CM, int c)
{
Stack[SP++] = CM;
if (CM.Count[c] > 0)
{
var cumFreqUnder = 0;
for (var i = 0; i < c; i++) cumFreqUnder += CM.Count[i];
ArithmeticCoder.Encode((uint)cumFreqUnder, (uint)CM.Count[c], (uint)(CM.TotFr + CM.Esc));
return 1;
}
else
{
if (CM.Esc > 0)
{
ArithmeticCoder.Encode((uint)CM.TotFr, (uint)CM.Esc, (uint)(CM.TotFr + CM.Esc));
}
return 0;
}
}
static int DecodeSym(ContextModel CM)
{
Stack[SP++] = CM;
if (CM.Esc == 0) return -1;
var cumFreq = (int)ArithmeticCoder.GetFreq((uint)(CM.TotFr + CM.Esc));
if (cumFreq < CM.TotFr)
{
var cumFreqUnder = 0;
var i = 0;
while (true)
{
if ((cumFreqUnder + CM.Count[i]) <= cumFreq) cumFreqUnder += CM.Count[i];
else break;
i++;
}
ArithmeticCoder.DecodeUpdate((uint)cumFreqUnder, (uint)CM.Count[i], (uint)(CM.TotFr + CM.Esc));
return i;
}
else
{
ArithmeticCoder.DecodeUpdate((uint)CM.TotFr, (uint)CM.Esc, (uint)(CM.TotFr + CM.Esc));
return -1;
}
}
static void Rescale(ContextModel CM)
{
CM.TotFr = 0;
for (int i = 0; i < 256; i++)
{
CM.Count[i] -= CM.Count[i] >> 1;
CM.TotFr += CM.Count[i];
}
}
static void UpdateModel(int c)
{
while (SP > 0)
{
SP--;
if (Stack[SP].TotFr >= MaxTotFr) Rescale(Stack[SP]);
Stack[SP].TotFr += 1;
if (Stack[SP].Count[c] == 0) Stack[SP].Esc += 1;
Stack[SP].Count[c] += 1;
}
}
static void Encode(Stream dataFile, Stream compressedFile)
{
var c = 0;
InitModel();
ArithmeticCoder.StartEncode(compressedFile);
while ((c = dataFile.ReadByte()) != -1)
{
var success = EncodeSym(cm[Context[0]], c);
if (success == 0) EncodeSym(cm[256], c);
UpdateModel(c);
Context[0] = c;
}
ArithmeticCoder.Encode((uint)cm[Context[0]].TotFr, (uint)cm[Context[0]].Esc, (uint)(cm[Context[0]].TotFr + cm[Context[0]].Esc));
ArithmeticCoder.Encode((uint)cm[256].TotFr, (uint)cm[256].Esc, (uint)(cm[256].TotFr + cm[256].Esc));
ArithmeticCoder.FinishEncode();
}
static void Decode(Stream compressedFile, Stream dataFile)
{
InitModel();
ArithmeticCoder.StartDecode(compressedFile);
while (true)
{
var c = DecodeSym(cm[Context[0]]);
if (c == -1)
{
c = DecodeSym(cm[256]);
if (c == -1) break;
}
UpdateModel(c);
Context[0] = c;
dataFile.WriteByte((byte)c);
}
}
static long GetFileSize(string fileName)
{
return new FileInfo(fileName).Length;
}
static void Main(string[] args)
{
if (args.Length != 3)
{
Console.WriteLine("Context compressor, author M.Smirnov.");
Console.WriteLine("Usage for compression : c infile outfile");
Console.WriteLine("Usage for decompression: d infile outfile");
return;
}
if (args[0][0] == 'c')
{
var sw = Stopwatch.StartNew();
using (var dataFile = File.Open(args[1], FileMode.Open))
using (var compressedFile = File.Open(args[2], FileMode.Create))
{
Encode(dataFile, compressedFile);
}
var dataSize = GetFileSize(args[1]);
var resultSize = GetFileSize(args[2]);
Console.WriteLine($"{"Original size",-45}{dataSize,10} bytes");
Console.WriteLine($"{"Actual encoded size",-45}{resultSize,10} bytes");
Console.WriteLine();
Console.WriteLine($"{"Time for encoding",-50}{sw.Elapsed.TotalSeconds:N3} sec.");
var ratio = (double)resultSize / dataSize;
Console.WriteLine($"{"Compression ratio",-50}{ratio:N3} of original size.");
}
else if (args[0][0] == 'd')
{
Console.WriteLine($"{"Compressed file size",-45}{GetFileSize(args[1]),10} bytes");
var sw = Stopwatch.StartNew();
using (var compressedFile = File.Open(args[1], FileMode.Open))
using (var dataFile = File.Open(args[2], FileMode.Create))
{
Decode(compressedFile, dataFile);
}
Console.WriteLine($"{"Decompression is ended, time is",-50}{sw.Elapsed.TotalSeconds:N3} sec.");
}
else
{
Console.WriteLine("Misformatted command.");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment