Skip to content

Instantly share code, notes, and snippets.

@hdorio
Last active January 13, 2022 06:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hdorio/17332012563cfa022622d26ef63b095b to your computer and use it in GitHub Desktop.
Save hdorio/17332012563cfa022622d26ef63b095b to your computer and use it in GitHub Desktop.
number of lines
python -c "print($(grep -v "}\$" fast.zig | wc -l) - $(grep -v "}\$" small.zig | wc -l))"
// 259
const std = @import("std");
const io = std.io;
const math = std.math;
const mem = std.mem;
const Allocator = std.mem.Allocator;
const ArrayList = std.ArrayList;
const bu = struct {
pub fn bitReverse(comptime T: type, value: T, N: usize) T {
const r = @bitReverse(T, value);
return r >> @intCast(math.Log2Int(T), @typeInfo(T).Int.bits - N);
}
};
const ddec = struct {
pub const DictDecoder = struct {
const Self = @This();
allocator: Allocator = undefined,
hist: []u8 = undefined, // Sliding window history
wr_pos: u32 = 0, // Current output position in buffer
rd_pos: u32 = 0, // Have emitted hist[0..rd_pos] already
full: bool = false, // Has a full window length been written yet?
pub fn init(self: *Self, allocator: Allocator, size: u32, dict: ?[]const u8) !void {
self.allocator = allocator;
self.hist = try allocator.alloc(u8, size);
self.wr_pos = 0;
if (dict != null) {
mem.copy(u8, self.hist, dict.?[dict.?.len -| self.hist.len..]);
self.wr_pos = @intCast(u32, dict.?.len);
}
if (self.wr_pos == self.hist.len) {
self.wr_pos = 0;
self.full = true;
}
self.rd_pos = self.wr_pos;
}
pub fn deinit(self: *Self) void {
self.allocator.free(self.hist);
}
pub fn histSize(self: *Self) u32 {
if (self.full) {
return @intCast(u32, self.hist.len);
}
return self.wr_pos;
}
pub fn availRead(self: *Self) u32 {
return self.wr_pos - self.rd_pos;
}
pub fn availWrite(self: *Self) u32 {
return @intCast(u32, self.hist.len - self.wr_pos);
}
pub fn writeSlice(self: *Self) []u8 {
return self.hist[self.wr_pos..];
}
pub fn writeMark(self: *Self, count: u32) void {
self.wr_pos += count;
}
pub fn writeByte(self: *Self, byte: u8) void {
self.hist[self.wr_pos] = byte;
self.wr_pos += 1;
}
fn copy(dst: []u8, src: []const u8) u32 {
if (src.len > dst.len) {
mem.copy(u8, dst, src[0..dst.len]);
return @intCast(u32, dst.len);
}
mem.copy(u8, dst, src);
return @intCast(u32, src.len);
}
pub fn writeCopy(self: *Self, dist: u32, length: u32) u32 {
var dst_base = self.wr_pos;
var dst_pos = dst_base;
var src_pos: i32 = @intCast(i32, dst_pos) - @intCast(i32, dist);
var end_pos = dst_pos + length;
if (end_pos > self.hist.len) {
end_pos = @intCast(u32, self.hist.len);
}
if (src_pos < 0) {
src_pos += @intCast(i32, self.hist.len);
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@intCast(usize, src_pos)..]);
src_pos = 0;
}
while (dst_pos < end_pos) {
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@intCast(usize, src_pos)..dst_pos]);
}
self.wr_pos = dst_pos;
return dst_pos - dst_base;
}
pub fn tryWriteCopy(self: *Self, dist: u32, length: u32) u32 {
var dst_pos = self.wr_pos;
var end_pos = dst_pos + length;
if (dst_pos < dist or end_pos > self.hist.len) {
return 0;
}
var dst_base = dst_pos;
var src_pos = dst_pos - dist;
while (dst_pos < end_pos) {
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[src_pos..dst_pos]);
}
self.wr_pos = dst_pos;
return dst_pos - dst_base;
}
pub fn readFlush(self: *Self) []u8 {
var to_read = self.hist[self.rd_pos..self.wr_pos];
self.rd_pos = self.wr_pos;
if (self.wr_pos == self.hist.len) {
self.wr_pos = 0;
self.rd_pos = 0;
self.full = true;
}
return to_read;
}
};
};
const deflate_const = struct {
pub const max_store_block_size = 65535;
pub const end_block_marker = 256;
pub const base_match_length = 3;
pub const base_match_offset = 1;
pub const max_match_length = 258;
pub const max_match_offset = 1 << 15;
pub const offset_code_count = 30;
pub const max_num_frequencies = max_num_lit;
};
const mu = struct {
pub fn copy(dst: []u8, src: []const u8) usize {
if (dst.len <= src.len) {
mem.copy(u8, dst[0..], src[0..dst.len]);
} else {
mem.copy(u8, dst[0..src.len], src[0..]);
}
return math.min(dst.len, src.len);
}
};
const max_match_offset = deflate_const.max_match_offset;
const end_block_marker = deflate_const.end_block_marker;
const max_code_len = 16;
const max_num_lit = 286;
const max_num_dist = 30;
const num_codes = 19;
var corrupt_input_error_offset: u64 = undefined;
const InflateError = error{
CorruptInput,
BadInternalState,
BadReaderState,
UnexpectedEndOfStream,
EndOfStreamWithNoError,
};
const huffman_chunk_bits = 9;
const huffman_num_chunks = 1 << huffman_chunk_bits;
const huffman_count_mask = 15;
const huffman_value_shift = 4;
const HuffmanDecoder = struct {
const Self = @This();
allocator: Allocator = undefined,
min: u32 = 0,
chunks: [huffman_num_chunks]u32 = [1]u32{0} ** huffman_num_chunks,
links: [][]u32 = undefined,
link_mask: u32 = 0,
initialized: bool = false,
sub_links: ArrayList(u32) = undefined,
fn init(self: *Self, allocator: Allocator, lengths: []u32) !bool {
const sanity = false;
self.deinit();
if (self.min != 0) {
self.* = HuffmanDecoder{};
}
self.allocator = allocator;
var count: [max_code_len]u32 = [1]u32{0} ** max_code_len;
var min: u32 = 0;
var max: u32 = 0;
for (lengths) |n| {
if (n == 0) {
continue;
}
if (min == 0 or n < min) {
min = n;
}
if (n > max) {
max = n;
}
count[n] += 1;
}
if (max == 0) {
return true;
}
var next_code: [max_code_len]u32 = [1]u32{0} ** max_code_len;
var code: u32 = 0;
{
var i = min;
while (i <= max) : (i += 1) {
code <<= 1;
next_code[i] = code;
code += count[i];
}
}
if (code != @as(u32, 1) << @intCast(u5, max) and !(code == 1 and max == 1)) {
return false;
}
self.min = min;
if (max > huffman_chunk_bits) {
var numLinks = @as(u32, 1) << @intCast(u5, max - huffman_chunk_bits);
self.link_mask = @intCast(u32, numLinks - 1);
var link = next_code[huffman_chunk_bits + 1] >> 1;
self.links = try self.allocator.alloc([]u32, huffman_num_chunks - link);
self.sub_links = ArrayList(u32).init(self.allocator);
self.initialized = true;
var j = @intCast(u32, link);
while (j < huffman_num_chunks) : (j += 1) {
var reverse = @intCast(u32, bu.bitReverse(u16, @intCast(u16, j), 16));
reverse >>= @intCast(u32, 16 - huffman_chunk_bits);
var off = j - @intCast(u32, link);
if (sanity and self.chunks[reverse] != 0) {
unreachable;
}
self.chunks[reverse] = @intCast(u32, off << huffman_value_shift | (huffman_chunk_bits + 1));
self.links[off] = try self.allocator.alloc(u32, numLinks);
try self.sub_links.append(off);
}
}
for (lengths) |n, li| {
if (n == 0) {
continue;
}
var ncode = next_code[n];
next_code[n] += 1;
var chunk = @intCast(u32, (li << huffman_value_shift) | n);
var reverse = @intCast(u32, bu.bitReverse(u16, @intCast(u16, ncode), 16));
reverse >>= @intCast(u5, 16 - n);
if (n <= huffman_chunk_bits) {
var off = reverse;
while (off < self.chunks.len) : (off += @as(u32, 1) << @intCast(u5, n)) {
if (sanity and self.chunks[off] != 0) {
unreachable;
}
self.chunks[off] = chunk;
}
} else {
var j = reverse & (huffman_num_chunks - 1);
if (sanity and self.chunks[j] & huffman_count_mask != huffman_chunk_bits + 1) {
unreachable;
}
var value = self.chunks[j] >> huffman_value_shift;
var link_tab = self.links[value];
reverse >>= huffman_chunk_bits;
var off = reverse;
while (off < link_tab.len) : (off += @as(u32, 1) << @intCast(u5, n - huffman_chunk_bits)) {
if (sanity and link_tab[off] != 0) {
unreachable;
}
link_tab[off] = chunk;
}
}
}
if (sanity) {
for (self.chunks) |chunk, i| {
if (chunk == 0) {
if (code == 1 and i % 2 == 1) {
continue;
}
unreachable;
}
}
for (self.links) |link_tab| {
for (link_tab) |chunk| {
if (chunk == 0) {
unreachable;
}
}
}
}
return true;
}
pub fn deinit(self: *Self) void {
if (self.initialized and self.links.len > 0) {
for (self.sub_links.items) |off| {
self.allocator.free(self.links[off]);
}
self.allocator.free(self.links);
self.sub_links.deinit();
}
}
};
var fixed_huffman_decoder: ?HuffmanDecoder = null;
fn fixedHuffmanDecoderInit(allocator: Allocator) !HuffmanDecoder {
if (fixed_huffman_decoder != null) {
return fixed_huffman_decoder.?;
}
var bits: [288]u32 = undefined;
var i: u32 = 0;
while (i < 144) : (i += 1) {
bits[i] = 8;
}
while (i < 256) : (i += 1) {
bits[i] = 9;
}
while (i < 280) : (i += 1) {
bits[i] = 7;
}
while (i < 288) : (i += 1) {
bits[i] = 8;
}
fixed_huffman_decoder = HuffmanDecoder{};
_ = try fixed_huffman_decoder.?.init(allocator, &bits);
return fixed_huffman_decoder.?;
}
const DecompressorState = enum {
init,
dict,
};
pub fn decompressor(allocator: Allocator, reader: anytype, dictionary: ?[]const u8) !Decompressor(@TypeOf(reader)) {
return Decompressor(@TypeOf(reader)).init(allocator, reader, dictionary);
}
pub fn Decompressor(comptime ReaderType: type) type {
return struct {
const Self = @This();
pub const Error =
ReaderType.Error ||
error{EndOfStream} ||
InflateError ||
Allocator.Error;
pub const Reader = io.Reader(*Self, Error, read);
allocator: Allocator,
inner_reader: ReaderType,
roffset: u64,
b: u32,
nb: u32,
hd1: HuffmanDecoder,
hd2: HuffmanDecoder,
bits: *[max_num_lit + max_num_dist]u32,
codebits: *[num_codes]u32,
dict: ddec.DictDecoder,
buf: [4]u8,
step: fn (*Self) Error!void,
step_state: DecompressorState,
final: bool,
err: ?InflateError,
to_read: []u8,
hl: ?*HuffmanDecoder,
hd: ?*HuffmanDecoder,
copy_len: u32,
copy_dist: u32,
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
fn init(allocator: Allocator, in_reader: ReaderType, dict: ?[]const u8) !Self {
var dd = ddec.DictDecoder{};
try dd.init(allocator, max_match_offset, dict);
fixed_huffman_decoder = try fixedHuffmanDecoderInit(allocator);
return Self{
.allocator = allocator,
.inner_reader = in_reader,
.roffset = 0,
.b = 0,
.nb = 0,
.hd1 = HuffmanDecoder{},
.hd2 = HuffmanDecoder{},
.bits = try allocator.create([max_num_lit + max_num_dist]u32),
.codebits = try allocator.create([num_codes]u32),
.dict = dd,
.buf = [_]u8{0} ** 4,
.step = nextBlock,
.step_state = .init,
.final = false,
.err = null,
.to_read = &[0]u8{},
.hl = null,
.hd = null,
.copy_len = 0,
.copy_dist = 0,
};
}
pub fn deinit(self: *Self) void {
self.hd1.deinit();
self.hd2.deinit();
self.dict.deinit();
self.allocator.destroy(self.bits);
self.allocator.destroy(self.codebits);
}
fn nextBlock(self: *Self) Error!void {
while (self.nb < 1 + 2) {
self.moreBits() catch |e| {
self.err = e;
return e;
};
}
self.final = self.b & 1 == 1;
self.b >>= 1;
var typ = self.b & 3;
self.b >>= 2;
self.nb -= 1 + 2;
switch (typ) {
0 => try self.dataBlock(),
1 => {
self.hl = &fixed_huffman_decoder.?;
self.hd = null;
try self.huffmanBlock();
},
2 => {
try self.readHuffman();
self.hl = &self.hd1;
self.hd = &self.hd2;
try self.huffmanBlock();
},
else => {
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
},
}
}
pub fn read(self: *Self, b: []u8) Error!usize {
while (true) {
if (self.to_read.len > 0) {
var n = mu.copy(b, self.to_read);
self.to_read = self.to_read[n..];
if (self.to_read.len == 0 and
self.err != null)
{
if (self.err.? == InflateError.EndOfStreamWithNoError) {
return n;
}
return self.err.?;
}
return n;
}
if (self.err != null) {
if (self.err.? == InflateError.EndOfStreamWithNoError) {
return 0;
}
return self.err.?;
}
self.step(self) catch {
if (self.to_read.len == 0) {
self.to_read = self.dict.readFlush();
}
};
}
}
pub fn close(self: *Self) ?Error {
if (self.err == InflateError.EndOfStreamWithNoError) {
return null;
}
return self.err;
}
const code_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
fn readHuffman(self: *Self) Error!void {
while (self.nb < 5 + 5 + 4) {
try self.moreBits();
}
var nlit = @intCast(u32, self.b & 0x1F) + 257;
if (nlit > max_num_lit) {
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
}
self.b >>= 5;
var ndist = @intCast(u32, self.b & 0x1F) + 1;
if (ndist > max_num_dist) {
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
}
self.b >>= 5;
var nclen = @intCast(u32, self.b & 0xF) + 4;
self.b >>= 4;
self.nb -= 5 + 5 + 4;
var i: u32 = 0;
while (i < nclen) : (i += 1) {
while (self.nb < 3) {
try self.moreBits();
}
self.codebits[code_order[i]] = @intCast(u32, self.b & 0x7);
self.b >>= 3;
self.nb -= 3;
}
i = nclen;
while (i < code_order.len) : (i += 1) {
self.codebits[code_order[i]] = 0;
}
if (!try self.hd1.init(self.allocator, self.codebits[0..])) {
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
}
i = 0;
var n = nlit + ndist;
while (i < n) {
var x = try self.huffSym(&self.hd1);
if (x < 16) {
self.bits[i] = x;
i += 1;
continue;
}
var rep: u32 = 0;
var nb: u32 = 0;
var b: u32 = 0;
switch (x) {
16 => {
rep = 3;
nb = 2;
if (i == 0) {
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
}
b = self.bits[i - 1];
},
17 => {
rep = 3;
nb = 3;
b = 0;
},
18 => {
rep = 11;
nb = 7;
b = 0;
},
else => return error.BadInternalState,
}
while (self.nb < nb) {
try self.moreBits();
}
rep += @intCast(u32, self.b & (@as(u32, 1) << @intCast(u5, nb)) - 1);
self.b >>= @intCast(u5, nb);
self.nb -= nb;
if (i + rep > n) {
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
}
var j: u32 = 0;
while (j < rep) : (j += 1) {
self.bits[i] = b;
i += 1;
}
}
if (!try self.hd1.init(self.allocator, self.bits[0..nlit]) or
!try self.hd2.init(self.allocator, self.bits[nlit .. nlit + ndist]))
{
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
}
if (self.hd1.min < self.bits[end_block_marker]) {
self.hd1.min = self.bits[end_block_marker];
}
return;
}
fn huffmanBlock(self: *Self) Error!void {
while (true) {
switch (self.step_state) {
.init => {
var v = try self.huffSym(self.hl.?);
var n: u32 = 0;
var length: u32 = 0;
switch (v) {
0...255 => {
self.dict.writeByte(@intCast(u8, v));
if (self.dict.availWrite() == 0) {
self.to_read = self.dict.readFlush();
self.step = huffmanBlock;
self.step_state = .init;
return;
}
self.step_state = .init;
continue;
},
256 => {
self.finishBlock();
return;
},
257...264 => {
length = v - (257 - 3);
n = 0;
},
265...268 => {
length = v * 2 - (265 * 2 - 11);
n = 1;
},
269...272 => {
length = v * 4 - (269 * 4 - 19);
n = 2;
},
273...276 => {
length = v * 8 - (273 * 8 - 35);
n = 3;
},
277...280 => {
length = v * 16 - (277 * 16 - 67);
n = 4;
},
281...284 => {
length = v * 32 - (281 * 32 - 131);
n = 5;
},
max_num_lit - 1 => {
length = 258;
n = 0;
},
else => {
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
},
}
if (n > 0) {
while (self.nb < n) {
try self.moreBits();
}
length += @intCast(u32, self.b) & ((@as(u32, 1) << @intCast(u5, n)) - 1);
self.b >>= @intCast(u5, n);
self.nb -= n;
}
var dist: u32 = 0;
if (self.hd == null) {
while (self.nb < 5) {
try self.moreBits();
}
dist = @intCast(
u32,
bu.bitReverse(u8, @intCast(u8, (self.b & 0x1F) << 3), 8),
);
self.b >>= 5;
self.nb -= 5;
} else {
dist = try self.huffSym(self.hd.?);
}
switch (dist) {
0...3 => dist += 1,
4...max_num_dist - 1 => {
var nb = @intCast(u32, dist - 2) >> 1;
var extra = (dist & 1) << @intCast(u5, nb);
while (self.nb < nb) {
try self.moreBits();
}
extra |= @intCast(u32, self.b & (@as(u32, 1) << @intCast(u5, nb)) - 1);
self.b >>= @intCast(u5, nb);
self.nb -= nb;
dist = (@as(u32, 1) << @intCast(u5, nb + 1)) + 1 + extra;
},
else => {
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
},
}
if (dist > self.dict.histSize()) {
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
}
self.copy_len = length;
self.copy_dist = dist;
self.step_state = .dict;
},
.dict => {
var cnt = self.dict.tryWriteCopy(self.copy_dist, self.copy_len);
if (cnt == 0) {
cnt = self.dict.writeCopy(self.copy_dist, self.copy_len);
}
self.copy_len -= cnt;
if (self.dict.availWrite() == 0 or self.copy_len > 0) {
self.to_read = self.dict.readFlush();
self.step = huffmanBlock;
self.step_state = .dict;
return;
}
self.step_state = .init;
},
}
}
}
fn dataBlock(self: *Self) Error!void {
self.nb = 0;
self.b = 0;
var nr: u32 = 4;
self.inner_reader.readNoEof(self.buf[0..nr]) catch {
self.err = InflateError.UnexpectedEndOfStream;
return InflateError.UnexpectedEndOfStream;
};
self.roffset += @intCast(u64, nr);
var n = @intCast(u32, self.buf[0]) | @intCast(u32, self.buf[1]) << 8;
var nn = @intCast(u32, self.buf[2]) | @intCast(u32, self.buf[3]) << 8;
if (@intCast(u16, nn) != @truncate(u16, ~n)) {
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
}
if (n == 0) {
self.to_read = self.dict.readFlush();
self.finishBlock();
return;
}
self.copy_len = n;
try self.copyData();
}
fn copyData(self: *Self) Error!void {
var buf = self.dict.writeSlice();
if (buf.len > self.copy_len) {
buf = buf[0..self.copy_len];
}
var cnt = try self.inner_reader.read(buf);
if (cnt < buf.len) {
self.err = InflateError.UnexpectedEndOfStream;
}
self.roffset += @intCast(u64, cnt);
self.copy_len -= @intCast(u32, cnt);
self.dict.writeMark(@intCast(u32, cnt));
if (self.err != null) {
return InflateError.UnexpectedEndOfStream;
}
if (self.dict.availWrite() == 0 or self.copy_len > 0) {
self.to_read = self.dict.readFlush();
self.step = copyData;
return;
}
self.finishBlock();
}
fn finishBlock(self: *Self) void {
if (self.final) {
if (self.dict.availRead() > 0) {
self.to_read = self.dict.readFlush();
}
self.err = InflateError.EndOfStreamWithNoError;
}
self.step = nextBlock;
}
fn moreBits(self: *Self) InflateError!void {
var c = self.inner_reader.readByte() catch |e| {
if (e == error.EndOfStream) {
return InflateError.UnexpectedEndOfStream;
}
return InflateError.BadReaderState;
};
self.roffset += 1;
self.b |= @intCast(u32, c) << @intCast(u5, self.nb);
self.nb += 8;
return;
}
fn huffSym(self: *Self, h: *HuffmanDecoder) InflateError!u32 {
var n = @intCast(u32, h.min);
var nb = self.nb;
var b = self.b;
while (true) {
while (nb < n) {
var c = self.inner_reader.readByte() catch |e| {
self.b = b;
self.nb = nb;
if (e == error.EndOfStream) {
return error.UnexpectedEndOfStream;
}
return InflateError.BadReaderState;
};
self.roffset += 1;
b |= @intCast(u32, c) << @intCast(u5, nb & 31);
nb += 8;
}
var chunk = h.chunks[b & (huffman_num_chunks - 1)];
n = @intCast(u32, chunk & huffman_count_mask);
if (n > huffman_chunk_bits) {
chunk = h.links[chunk >> huffman_value_shift][(b >> huffman_chunk_bits) & h.link_mask];
n = @intCast(u32, chunk & huffman_count_mask);
}
if (n <= nb) {
if (n == 0) {
self.b = b;
self.nb = nb;
corrupt_input_error_offset = self.roffset;
self.err = InflateError.CorruptInput;
return InflateError.CorruptInput;
}
self.b = b >> @intCast(u5, n & 31);
self.nb = nb - n;
return @intCast(u32, chunk >> huffman_value_shift);
}
}
}
pub fn reset(s: *Self, new_reader: ReaderType, new_dict: ?[]const u8) !void {
s.inner_reader = new_reader;
s.step = nextBlock;
s.err = null;
s.dict.deinit();
try s.dict.init(s.allocator, max_match_offset, new_dict);
return;
}
};
}
const std = @import("std");
const io = std.io;
const math = std.math;
const mem = std.mem;
const assert = std.debug.assert;
const MAXBITS = 15;
const MAXLCODES = 286;
const MAXDCODES = 30;
const MAXCODES = MAXLCODES + MAXDCODES;
const FIXLCODES = 288;
const PREFIX_LUT_BITS = 9;
const Huffman = struct {
const LUTEntry = packed struct { symbol: u16 align(4), len: u16 };
count: [MAXBITS + 1]u16,
symbol: [MAXCODES]u16,
prefix_lut: [1 << PREFIX_LUT_BITS]LUTEntry,
last_code: u16,
last_index: u16,
min_code_len: u16,
const ConstructError = error{ Oversubscribed, IncompleteSet };
fn construct(self: *Huffman, code_length: []const u16) ConstructError!void {
for (self.count) |*val| {
val.* = 0;
}
self.min_code_len = math.maxInt(u16);
for (code_length) |len| {
if (len != 0 and len < self.min_code_len)
self.min_code_len = len;
self.count[len] += 1;
}
if (self.count[0] == code_length.len) {
self.min_code_len = 0;
return;
}
var left: isize = 1;
for (self.count[1..]) |val| {
left *= 2;
left -= @as(isize, @bitCast(i16, val));
if (left < 0)
return error.Oversubscribed;
}
var offset: [MAXBITS + 1]u16 = undefined;
var codes: [MAXBITS + 1]u16 = undefined;
{
offset[1] = 0;
codes[1] = 0;
var len: usize = 1;
while (len < MAXBITS) : (len += 1) {
offset[len + 1] = offset[len] + self.count[len];
codes[len + 1] = (codes[len] + self.count[len]) << 1;
}
}
self.prefix_lut = mem.zeroes(@TypeOf(self.prefix_lut));
for (code_length) |len, symbol| {
if (len != 0) {
self.symbol[offset[len]] = @truncate(u16, symbol);
offset[len] += 1;
}
if (len == 0 or len > PREFIX_LUT_BITS)
continue;
const bits_to_fill = @intCast(u5, PREFIX_LUT_BITS - len);
const rev_code = bitReverse(u16, codes[len], len);
codes[len] += 1;
var j: usize = 0;
while (j < @as(usize, 1) << bits_to_fill) : (j += 1) {
const index = rev_code | (j << @intCast(u5, len));
assert(self.prefix_lut[index].len == 0);
self.prefix_lut[index] = .{
.symbol = @truncate(u16, symbol),
.len = @truncate(u16, len),
};
}
}
self.last_code = codes[PREFIX_LUT_BITS + 1];
self.last_index = offset[PREFIX_LUT_BITS + 1] - self.count[PREFIX_LUT_BITS + 1];
if (left > 0)
return error.IncompleteSet;
}
};
fn bitReverse(comptime T: type, value: T, N: usize) T {
const r = @bitReverse(T, value);
return r >> @intCast(math.Log2Int(T), @typeInfo(T).Int.bits - N);
}
pub fn InflateStream(comptime ReaderType: type) type {
return struct {
const Self = @This();
pub const Error = ReaderType.Error || error{
EndOfStream,
BadCounts,
InvalidBlockType,
InvalidDistance,
InvalidFixedCode,
InvalidLength,
InvalidStoredSize,
InvalidSymbol,
InvalidTree,
MissingEOBCode,
NoLastLength,
OutOfCodes,
};
pub const Reader = io.Reader(*Self, Error, read);
inner_reader: ReaderType,
seen_eos: bool,
state: union(enum) {
DecodeBlockHeader: void,
DecodeBlockData: void,
Copy: usize,
CopyLit: u8,
CopyFrom: struct { distance: u16, length: u16 },
},
window: struct {
const WSelf = @This();
buf: []u8,
wi: usize = 0,
ri: usize = 0,
el: usize = 0,
total_written: usize = 0,
fn readable(self: *WSelf) usize {
return self.el;
}
fn writable(self: *WSelf) usize {
return self.buf.len - self.el;
}
fn append(self: *WSelf, value: u8) usize {
if (self.writable() < 1) return 0;
self.appendUnsafe(value);
return 1;
}
inline fn appendUnsafe(self: *WSelf, value: u8) void {
self.buf[self.wi] = value;
self.wi = (self.wi + 1) & (self.buf.len - 1);
self.el += 1;
self.total_written += 1;
}
fn read(self: *WSelf, dest: []u8) usize {
const N = math.min(dest.len, self.readable());
if (N == 0) return 0;
if (self.ri + N < self.buf.len) {
mem.copy(u8, dest, self.buf[self.ri .. self.ri + N]);
} else {
std.mem.copy(u8, dest, self.buf[self.ri..]);
const r = self.buf.len - self.ri;
std.mem.copy(u8, dest[r..], self.buf[0 .. N - r]);
}
self.ri = (self.ri + N) & (self.buf.len - 1);
self.el -= N;
return N;
}
fn copyFrom(self: *WSelf, distance: usize, length: usize) usize {
const N = math.min(length, self.writable());
if (N == 0) return 0;
var i: usize = 0;
while (i < N) : (i += 1) {
const index = (self.wi -% distance) & (self.buf.len - 1);
self.appendUnsafe(self.buf[index]);
}
return N;
}
},
huffman_tables: [2]Huffman = undefined,
hdist: *Huffman,
hlen: *Huffman,
fn peekBits(self: *Self, bits: usize) !u32 {
while (self.bits_left < bits) {
const byte = try self.inner_reader.readByte();
self.bits |= @as(u32, byte) << @intCast(u5, self.bits_left);
self.bits_left += 8;
}
const mask = (@as(u32, 1) << @intCast(u5, bits)) - 1;
return self.bits & mask;
}
fn readBits(self: *Self, bits: usize) !u32 {
const val = try self.peekBits(bits);
self.discardBits(bits);
return val;
}
fn discardBits(self: *Self, bits: usize) void {
self.bits >>= @intCast(u5, bits);
self.bits_left -= bits;
}
fn stored(self: *Self) !void {
self.discardBits(self.bits_left);
const length = try self.inner_reader.readIntLittle(u16);
const length_cpl = try self.inner_reader.readIntLittle(u16);
if (length != ~length_cpl)
return error.InvalidStoredSize;
self.state = .{ .Copy = length };
}
fn fixed(self: *Self) !void {
comptime var lencode: Huffman = undefined;
comptime var distcode: Huffman = undefined;
comptime {
@setEvalBranchQuota(100000);
const len_lengths =
[_]u16{8} ** 144 ++
[_]u16{9} ** 112 ++
[_]u16{7} ** 24 ++
[_]u16{8} ** 8;
assert(len_lengths.len == FIXLCODES);
try lencode.construct(len_lengths[0..]);
const dist_lengths = [_]u16{5} ** MAXDCODES;
distcode.construct(dist_lengths[0..]) catch |err| switch (err) {
error.IncompleteSet => {},
else => return err,
};
}
self.hlen = &lencode;
self.hdist = &distcode;
self.state = .DecodeBlockData;
}
fn dynamic(self: *Self) !void {
const nlen = (try self.readBits(5)) + 257;
const ndist = (try self.readBits(5)) + 1;
const ncode = (try self.readBits(4)) + 4;
if (nlen > MAXLCODES or ndist > MAXDCODES)
return error.BadCounts;
const ORDER = [19]u16{
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4,
12, 3, 13, 2, 14, 1, 15,
};
var lencode: Huffman = undefined;
{
var lengths = std.mem.zeroes([19]u16);
for (ORDER[0..ncode]) |val| {
lengths[val] = @intCast(u16, try self.readBits(3));
}
lencode.construct(lengths[0..]) catch return error.InvalidTree;
}
var lengths = std.mem.zeroes([MAXCODES]u16);
var i: usize = 0;
while (i < nlen + ndist) {
const symbol = try self.decode(&lencode);
switch (symbol) {
0...15 => {
lengths[i] = symbol;
i += 1;
},
16 => {
if (i == 0) return error.NoLastLength;
const last_length = lengths[i - 1];
const repeat = 3 + (try self.readBits(2));
const last_index = i + repeat;
if (last_index > lengths.len)
return error.InvalidLength;
while (i < last_index) : (i += 1) {
lengths[i] = last_length;
}
},
17 => {
i += 3 + (try self.readBits(3));
},
18 => {
i += 11 + (try self.readBits(7));
},
else => return error.InvalidSymbol,
}
}
if (i > nlen + ndist)
return error.InvalidLength;
if (lengths[256] == 0)
return error.MissingEOBCode;
self.huffman_tables[0].construct(lengths[0..nlen]) catch |err| switch (err) {
error.Oversubscribed => return error.InvalidTree,
error.IncompleteSet => {
if (nlen != self.huffman_tables[0].count[0] + self.huffman_tables[0].count[1]) {
return error.InvalidTree;
}
},
};
self.huffman_tables[1].construct(lengths[nlen .. nlen + ndist]) catch |err| switch (err) {
error.Oversubscribed => return error.InvalidTree,
error.IncompleteSet => {
if (ndist != self.huffman_tables[1].count[0] + self.huffman_tables[1].count[1]) {
return error.InvalidTree;
}
},
};
self.hlen = &self.huffman_tables[0];
self.hdist = &self.huffman_tables[1];
self.state = .DecodeBlockData;
}
fn codes(self: *Self, lencode: *Huffman, distcode: *Huffman) !bool {
const LENS = [29]u16{
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258,
};
const LEXT = [29]u16{
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0,
};
const DISTS = [30]u16{
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577,
};
const DEXT = [30]u16{
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
};
while (true) {
const symbol = try self.decode(lencode);
switch (symbol) {
0...255 => {
const c = @truncate(u8, symbol);
if (self.window.append(c) == 0) {
self.state = .{ .CopyLit = c };
return false;
}
},
256 => {
return true;
},
257...285 => {
const length_symbol = symbol - 257;
const length = LENS[length_symbol] +
@intCast(u16, try self.readBits(LEXT[length_symbol]));
const distance_symbol = try self.decode(distcode);
const distance = DISTS[distance_symbol] +
@intCast(u16, try self.readBits(DEXT[distance_symbol]));
if (distance > self.window.buf.len or distance > self.window.total_written)
return error.InvalidDistance;
const written = self.window.copyFrom(distance, length);
if (written != length) {
self.state = .{
.CopyFrom = .{
.distance = distance,
.length = length - @truncate(u16, written),
},
};
return false;
}
},
else => return error.InvalidFixedCode,
}
}
}
fn decode(self: *Self, h: *Huffman) !u16 {
var prefix: u32 = 0;
var code_len = h.min_code_len;
if (code_len == 0)
return error.OutOfCodes;
while (true) {
_ = try self.peekBits(code_len);
prefix = self.bits & ((1 << PREFIX_LUT_BITS) - 1);
const lut_entry = &h.prefix_lut[prefix];
if (lut_entry.len == 0)
break;
if (lut_entry.len <= code_len) {
self.discardBits(code_len);
return lut_entry.symbol;
}
code_len = lut_entry.len;
}
prefix = try self.readBits(PREFIX_LUT_BITS);
var len: usize = PREFIX_LUT_BITS + 1;
var first: usize = h.last_code;
var index: usize = h.last_index;
var code = bitReverse(u32, prefix, PREFIX_LUT_BITS + 1);
while (len <= MAXBITS) : (len += 1) {
code |= try self.readBits(1);
const count = h.count[len];
if (code < first + count) {
return h.symbol[index + (code - first)];
}
index += count;
first += count;
first <<= 1;
code <<= 1;
}
return error.OutOfCodes;
}
fn step(self: *Self) !void {
while (true) {
switch (self.state) {
.DecodeBlockHeader => {
if (self.seen_eos) return;
const last = @intCast(u1, try self.readBits(1));
const kind = @intCast(u2, try self.readBits(2));
self.seen_eos = last != 0;
switch (kind) {
0 => try self.stored(),
1 => try self.fixed(),
2 => try self.dynamic(),
3 => return error.InvalidBlockType,
}
},
.DecodeBlockData => {
if (!try self.codes(self.hlen, self.hdist)) {
return;
}
self.state = .DecodeBlockHeader;
},
.Copy => |*length| {
const N = math.min(self.window.writable(), length.*);
var i: usize = 0;
while (i < N) : (i += 1) {
var tmp: [1]u8 = undefined;
if ((try self.inner_reader.read(&tmp)) != 1) {
return error.EndOfStream;
}
self.window.appendUnsafe(tmp[0]);
}
if (N != length.*) {
length.* -= N;
return;
}
self.state = .DecodeBlockHeader;
},
.CopyLit => |c| {
if (self.window.append(c) == 0) {
return;
}
self.state = .DecodeBlockData;
},
.CopyFrom => |*info| {
const written = self.window.copyFrom(info.distance, info.length);
if (written != info.length) {
info.length -= @truncate(u16, written);
return;
}
self.state = .DecodeBlockData;
},
}
}
}
fn init(source: ReaderType, window_slice: []u8) Self {
assert(math.isPowerOfTwo(window_slice.len));
return Self{
.inner_reader = source,
.window = .{ .buf = window_slice },
.seen_eos = false,
.state = .DecodeBlockHeader,
.hdist = undefined,
.hlen = undefined,
.bits = 0,
.bits_left = 0,
};
}
pub fn read(self: *Self, buffer: []u8) Error!usize {
if (buffer.len == 0)
return 0;
var read_amt: usize = self.window.read(buffer);
while (read_amt < buffer.len) {
try self.step();
if (self.window.readable() == 0)
break;
read_amt += self.window.read(buffer[read_amt..]);
}
return read_amt;
}
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
};
}
pub fn inflateStream(reader: anytype, window_slice: []u8) InflateStream(@TypeOf(reader)) {
return InflateStream(@TypeOf(reader)).init(reader, window_slice);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment