Skip to content

Instantly share code, notes, and snippets.

@kprotty
Last active January 2, 2024 11:14
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kprotty/08e4fefaa73186da3b573c1395689354 to your computer and use it in GitHub Desktop.
Save kprotty/08e4fefaa73186da3b573c1395689354 to your computer and use it in GitHub Desktop.
Simple LZ4 block enc/dec in 100 LOC
const assert = @import("std").debug.assert;
fn compressBlock(writer: anytype, src: []const u8) !void {
var table = [_]u32{0} ** 4096; // size is pow2. bump to match more. ideal = (0xffff+1) / sizeof(u32)
var anchor: u32 = 0;
if (src.len > 12) { // LZ4 spec restriction: last match must start 12b before end of block.
var pos: u32 = 0;
while (pos + 4 < src.len - 5) { // LZ4 spec restriction: last 5b are always literal.
const blk: u32 = @bitCast(src[pos..][0..4].*);
const hash = (blk *% 0x9e3779b9) >> (32 - @ctz(table.len));
const match = table[hash] -% 1;
table[hash] = pos +% 1;
const offset = pos -% match;
if (match == ~@as(u32, 0) or offset > 0xffff or blk != @as(u32, @bitCast(src[match..][0..4].*))) {
pos += 1;
continue;
}
var matches: u32 = 4;
while (pos + matches < src.len - 5 and src[pos + matches] == src[match + matches])
matches += 1;
const literals = pos - anchor;
pos += matches;
matches -= 4;
const token = ((if (literals < 0xf) literals else 0xf) << 4) | (if (matches < 0xf) matches else 0xf);
try writer.writeByte(@intCast(token));
if (literals >= 0xf) {
var n = literals - 0xf;
while (n >= 0xff) : (n -= 0xff) try writer.writeByte(0xff);
try writer.writeByte(@intCast(n));
}
try writer.writeAll(src[anchor..][0..literals]);
anchor = pos;
try writer.writeInt(u16, @intCast(offset), .little);
if (matches >= 0xf) {
var n = matches - 0xf;
while (n >= 0xff) : (n -= 0xff) try writer.writeByte(0xff);
try writer.writeByte(@intCast(n));
}
}
}
const literals = src.len - anchor;
if (anchor == 0) return error.Uncompressable;
try writer.writeByte(@intCast((if (literals < 0xf) literals else 0xf) << 4));
if (literals >= 0xf) {
var n = literals - 0xf;
while (n >= 0xff) : (n -= 0xff) try writer.writeByte(0xff);
try writer.writeByte(@intCast(n));
}
assert(anchor < src.len);
try writer.writeAll(src[anchor..]);
}
fn decompressBlock(dst: []u8, reader: anytype) ![]u8 {
var pos: u32 = 0;
while (true) {
const token: u32 = try reader.readByte();
assert(pos < dst.len);
var literals = token >> 4;
while (literals >= 0xf) {
const n = try reader.readByte();
literals += n;
if (n != 0xff) break;
}
try reader.readNoEof(dst[pos..][0..literals]);
pos += literals;
var matches = token & 0xf;
var offset: u32 = reader.readInt(u16, .little) catch |err| switch (err) {
error.EndOfStream => { // last literals token has no offset.
assert(matches == 0);
return dst[0..pos];
},
else => |e| return e,
};
assert(offset > 0);
offset = pos - offset;
while (matches >= 0xf) {
const n = try reader.readByte();
matches += n;
if (n != 0xff) break;
}
matches += 4;
for (0..matches) |i| dst[pos + i] = dst[offset + i]; // TODO: optimize this.
pos += matches;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment