Last active
January 13, 2022 06:51
-
-
Save hdorio/17332012563cfa022622d26ef63b095b to your computer and use it in GitHub Desktop.
number of lines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python -c "print($(grep -v "}\$" fast.zig | wc -l) - $(grep -v "}\$" small.zig | wc -l))" | |
// 259 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const io = std.io; | |
const math = std.math; | |
const mem = std.mem; | |
const Allocator = std.mem.Allocator; | |
const ArrayList = std.ArrayList; | |
const bu = struct { | |
pub fn bitReverse(comptime T: type, value: T, N: usize) T { | |
const r = @bitReverse(T, value); | |
return r >> @intCast(math.Log2Int(T), @typeInfo(T).Int.bits - N); | |
} | |
}; | |
const ddec = struct { | |
pub const DictDecoder = struct { | |
const Self = @This(); | |
allocator: Allocator = undefined, | |
hist: []u8 = undefined, // Sliding window history | |
wr_pos: u32 = 0, // Current output position in buffer | |
rd_pos: u32 = 0, // Have emitted hist[0..rd_pos] already | |
full: bool = false, // Has a full window length been written yet? | |
pub fn init(self: *Self, allocator: Allocator, size: u32, dict: ?[]const u8) !void { | |
self.allocator = allocator; | |
self.hist = try allocator.alloc(u8, size); | |
self.wr_pos = 0; | |
if (dict != null) { | |
mem.copy(u8, self.hist, dict.?[dict.?.len -| self.hist.len..]); | |
self.wr_pos = @intCast(u32, dict.?.len); | |
} | |
if (self.wr_pos == self.hist.len) { | |
self.wr_pos = 0; | |
self.full = true; | |
} | |
self.rd_pos = self.wr_pos; | |
} | |
pub fn deinit(self: *Self) void { | |
self.allocator.free(self.hist); | |
} | |
pub fn histSize(self: *Self) u32 { | |
if (self.full) { | |
return @intCast(u32, self.hist.len); | |
} | |
return self.wr_pos; | |
} | |
pub fn availRead(self: *Self) u32 { | |
return self.wr_pos - self.rd_pos; | |
} | |
pub fn availWrite(self: *Self) u32 { | |
return @intCast(u32, self.hist.len - self.wr_pos); | |
} | |
pub fn writeSlice(self: *Self) []u8 { | |
return self.hist[self.wr_pos..]; | |
} | |
pub fn writeMark(self: *Self, count: u32) void { | |
self.wr_pos += count; | |
} | |
pub fn writeByte(self: *Self, byte: u8) void { | |
self.hist[self.wr_pos] = byte; | |
self.wr_pos += 1; | |
} | |
fn copy(dst: []u8, src: []const u8) u32 { | |
if (src.len > dst.len) { | |
mem.copy(u8, dst, src[0..dst.len]); | |
return @intCast(u32, dst.len); | |
} | |
mem.copy(u8, dst, src); | |
return @intCast(u32, src.len); | |
} | |
pub fn writeCopy(self: *Self, dist: u32, length: u32) u32 { | |
var dst_base = self.wr_pos; | |
var dst_pos = dst_base; | |
var src_pos: i32 = @intCast(i32, dst_pos) - @intCast(i32, dist); | |
var end_pos = dst_pos + length; | |
if (end_pos > self.hist.len) { | |
end_pos = @intCast(u32, self.hist.len); | |
} | |
if (src_pos < 0) { | |
src_pos += @intCast(i32, self.hist.len); | |
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@intCast(usize, src_pos)..]); | |
src_pos = 0; | |
} | |
while (dst_pos < end_pos) { | |
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@intCast(usize, src_pos)..dst_pos]); | |
} | |
self.wr_pos = dst_pos; | |
return dst_pos - dst_base; | |
} | |
pub fn tryWriteCopy(self: *Self, dist: u32, length: u32) u32 { | |
var dst_pos = self.wr_pos; | |
var end_pos = dst_pos + length; | |
if (dst_pos < dist or end_pos > self.hist.len) { | |
return 0; | |
} | |
var dst_base = dst_pos; | |
var src_pos = dst_pos - dist; | |
while (dst_pos < end_pos) { | |
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[src_pos..dst_pos]); | |
} | |
self.wr_pos = dst_pos; | |
return dst_pos - dst_base; | |
} | |
pub fn readFlush(self: *Self) []u8 { | |
var to_read = self.hist[self.rd_pos..self.wr_pos]; | |
self.rd_pos = self.wr_pos; | |
if (self.wr_pos == self.hist.len) { | |
self.wr_pos = 0; | |
self.rd_pos = 0; | |
self.full = true; | |
} | |
return to_read; | |
} | |
}; | |
}; | |
const deflate_const = struct { | |
pub const max_store_block_size = 65535; | |
pub const end_block_marker = 256; | |
pub const base_match_length = 3; | |
pub const base_match_offset = 1; | |
pub const max_match_length = 258; | |
pub const max_match_offset = 1 << 15; | |
pub const offset_code_count = 30; | |
pub const max_num_frequencies = max_num_lit; | |
}; | |
const mu = struct { | |
pub fn copy(dst: []u8, src: []const u8) usize { | |
if (dst.len <= src.len) { | |
mem.copy(u8, dst[0..], src[0..dst.len]); | |
} else { | |
mem.copy(u8, dst[0..src.len], src[0..]); | |
} | |
return math.min(dst.len, src.len); | |
} | |
}; | |
const max_match_offset = deflate_const.max_match_offset; | |
const end_block_marker = deflate_const.end_block_marker; | |
const max_code_len = 16; | |
const max_num_lit = 286; | |
const max_num_dist = 30; | |
const num_codes = 19; | |
var corrupt_input_error_offset: u64 = undefined; | |
const InflateError = error{ | |
CorruptInput, | |
BadInternalState, | |
BadReaderState, | |
UnexpectedEndOfStream, | |
EndOfStreamWithNoError, | |
}; | |
const huffman_chunk_bits = 9; | |
const huffman_num_chunks = 1 << huffman_chunk_bits; | |
const huffman_count_mask = 15; | |
const huffman_value_shift = 4; | |
const HuffmanDecoder = struct { | |
const Self = @This(); | |
allocator: Allocator = undefined, | |
min: u32 = 0, | |
chunks: [huffman_num_chunks]u32 = [1]u32{0} ** huffman_num_chunks, | |
links: [][]u32 = undefined, | |
link_mask: u32 = 0, | |
initialized: bool = false, | |
sub_links: ArrayList(u32) = undefined, | |
fn init(self: *Self, allocator: Allocator, lengths: []u32) !bool { | |
const sanity = false; | |
self.deinit(); | |
if (self.min != 0) { | |
self.* = HuffmanDecoder{}; | |
} | |
self.allocator = allocator; | |
var count: [max_code_len]u32 = [1]u32{0} ** max_code_len; | |
var min: u32 = 0; | |
var max: u32 = 0; | |
for (lengths) |n| { | |
if (n == 0) { | |
continue; | |
} | |
if (min == 0 or n < min) { | |
min = n; | |
} | |
if (n > max) { | |
max = n; | |
} | |
count[n] += 1; | |
} | |
if (max == 0) { | |
return true; | |
} | |
var next_code: [max_code_len]u32 = [1]u32{0} ** max_code_len; | |
var code: u32 = 0; | |
{ | |
var i = min; | |
while (i <= max) : (i += 1) { | |
code <<= 1; | |
next_code[i] = code; | |
code += count[i]; | |
} | |
} | |
if (code != @as(u32, 1) << @intCast(u5, max) and !(code == 1 and max == 1)) { | |
return false; | |
} | |
self.min = min; | |
if (max > huffman_chunk_bits) { | |
var numLinks = @as(u32, 1) << @intCast(u5, max - huffman_chunk_bits); | |
self.link_mask = @intCast(u32, numLinks - 1); | |
var link = next_code[huffman_chunk_bits + 1] >> 1; | |
self.links = try self.allocator.alloc([]u32, huffman_num_chunks - link); | |
self.sub_links = ArrayList(u32).init(self.allocator); | |
self.initialized = true; | |
var j = @intCast(u32, link); | |
while (j < huffman_num_chunks) : (j += 1) { | |
var reverse = @intCast(u32, bu.bitReverse(u16, @intCast(u16, j), 16)); | |
reverse >>= @intCast(u32, 16 - huffman_chunk_bits); | |
var off = j - @intCast(u32, link); | |
if (sanity and self.chunks[reverse] != 0) { | |
unreachable; | |
} | |
self.chunks[reverse] = @intCast(u32, off << huffman_value_shift | (huffman_chunk_bits + 1)); | |
self.links[off] = try self.allocator.alloc(u32, numLinks); | |
try self.sub_links.append(off); | |
} | |
} | |
for (lengths) |n, li| { | |
if (n == 0) { | |
continue; | |
} | |
var ncode = next_code[n]; | |
next_code[n] += 1; | |
var chunk = @intCast(u32, (li << huffman_value_shift) | n); | |
var reverse = @intCast(u32, bu.bitReverse(u16, @intCast(u16, ncode), 16)); | |
reverse >>= @intCast(u5, 16 - n); | |
if (n <= huffman_chunk_bits) { | |
var off = reverse; | |
while (off < self.chunks.len) : (off += @as(u32, 1) << @intCast(u5, n)) { | |
if (sanity and self.chunks[off] != 0) { | |
unreachable; | |
} | |
self.chunks[off] = chunk; | |
} | |
} else { | |
var j = reverse & (huffman_num_chunks - 1); | |
if (sanity and self.chunks[j] & huffman_count_mask != huffman_chunk_bits + 1) { | |
unreachable; | |
} | |
var value = self.chunks[j] >> huffman_value_shift; | |
var link_tab = self.links[value]; | |
reverse >>= huffman_chunk_bits; | |
var off = reverse; | |
while (off < link_tab.len) : (off += @as(u32, 1) << @intCast(u5, n - huffman_chunk_bits)) { | |
if (sanity and link_tab[off] != 0) { | |
unreachable; | |
} | |
link_tab[off] = chunk; | |
} | |
} | |
} | |
if (sanity) { | |
for (self.chunks) |chunk, i| { | |
if (chunk == 0) { | |
if (code == 1 and i % 2 == 1) { | |
continue; | |
} | |
unreachable; | |
} | |
} | |
for (self.links) |link_tab| { | |
for (link_tab) |chunk| { | |
if (chunk == 0) { | |
unreachable; | |
} | |
} | |
} | |
} | |
return true; | |
} | |
pub fn deinit(self: *Self) void { | |
if (self.initialized and self.links.len > 0) { | |
for (self.sub_links.items) |off| { | |
self.allocator.free(self.links[off]); | |
} | |
self.allocator.free(self.links); | |
self.sub_links.deinit(); | |
} | |
} | |
}; | |
var fixed_huffman_decoder: ?HuffmanDecoder = null; | |
fn fixedHuffmanDecoderInit(allocator: Allocator) !HuffmanDecoder { | |
if (fixed_huffman_decoder != null) { | |
return fixed_huffman_decoder.?; | |
} | |
var bits: [288]u32 = undefined; | |
var i: u32 = 0; | |
while (i < 144) : (i += 1) { | |
bits[i] = 8; | |
} | |
while (i < 256) : (i += 1) { | |
bits[i] = 9; | |
} | |
while (i < 280) : (i += 1) { | |
bits[i] = 7; | |
} | |
while (i < 288) : (i += 1) { | |
bits[i] = 8; | |
} | |
fixed_huffman_decoder = HuffmanDecoder{}; | |
_ = try fixed_huffman_decoder.?.init(allocator, &bits); | |
return fixed_huffman_decoder.?; | |
} | |
const DecompressorState = enum { | |
init, | |
dict, | |
}; | |
pub fn decompressor(allocator: Allocator, reader: anytype, dictionary: ?[]const u8) !Decompressor(@TypeOf(reader)) { | |
return Decompressor(@TypeOf(reader)).init(allocator, reader, dictionary); | |
} | |
pub fn Decompressor(comptime ReaderType: type) type { | |
return struct { | |
const Self = @This(); | |
pub const Error = | |
ReaderType.Error || | |
error{EndOfStream} || | |
InflateError || | |
Allocator.Error; | |
pub const Reader = io.Reader(*Self, Error, read); | |
allocator: Allocator, | |
inner_reader: ReaderType, | |
roffset: u64, | |
b: u32, | |
nb: u32, | |
hd1: HuffmanDecoder, | |
hd2: HuffmanDecoder, | |
bits: *[max_num_lit + max_num_dist]u32, | |
codebits: *[num_codes]u32, | |
dict: ddec.DictDecoder, | |
buf: [4]u8, | |
step: fn (*Self) Error!void, | |
step_state: DecompressorState, | |
final: bool, | |
err: ?InflateError, | |
to_read: []u8, | |
hl: ?*HuffmanDecoder, | |
hd: ?*HuffmanDecoder, | |
copy_len: u32, | |
copy_dist: u32, | |
pub fn reader(self: *Self) Reader { | |
return .{ .context = self }; | |
} | |
fn init(allocator: Allocator, in_reader: ReaderType, dict: ?[]const u8) !Self { | |
var dd = ddec.DictDecoder{}; | |
try dd.init(allocator, max_match_offset, dict); | |
fixed_huffman_decoder = try fixedHuffmanDecoderInit(allocator); | |
return Self{ | |
.allocator = allocator, | |
.inner_reader = in_reader, | |
.roffset = 0, | |
.b = 0, | |
.nb = 0, | |
.hd1 = HuffmanDecoder{}, | |
.hd2 = HuffmanDecoder{}, | |
.bits = try allocator.create([max_num_lit + max_num_dist]u32), | |
.codebits = try allocator.create([num_codes]u32), | |
.dict = dd, | |
.buf = [_]u8{0} ** 4, | |
.step = nextBlock, | |
.step_state = .init, | |
.final = false, | |
.err = null, | |
.to_read = &[0]u8{}, | |
.hl = null, | |
.hd = null, | |
.copy_len = 0, | |
.copy_dist = 0, | |
}; | |
} | |
pub fn deinit(self: *Self) void { | |
self.hd1.deinit(); | |
self.hd2.deinit(); | |
self.dict.deinit(); | |
self.allocator.destroy(self.bits); | |
self.allocator.destroy(self.codebits); | |
} | |
fn nextBlock(self: *Self) Error!void { | |
while (self.nb < 1 + 2) { | |
self.moreBits() catch |e| { | |
self.err = e; | |
return e; | |
}; | |
} | |
self.final = self.b & 1 == 1; | |
self.b >>= 1; | |
var typ = self.b & 3; | |
self.b >>= 2; | |
self.nb -= 1 + 2; | |
switch (typ) { | |
0 => try self.dataBlock(), | |
1 => { | |
self.hl = &fixed_huffman_decoder.?; | |
self.hd = null; | |
try self.huffmanBlock(); | |
}, | |
2 => { | |
try self.readHuffman(); | |
self.hl = &self.hd1; | |
self.hd = &self.hd2; | |
try self.huffmanBlock(); | |
}, | |
else => { | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
}, | |
} | |
} | |
pub fn read(self: *Self, b: []u8) Error!usize { | |
while (true) { | |
if (self.to_read.len > 0) { | |
var n = mu.copy(b, self.to_read); | |
self.to_read = self.to_read[n..]; | |
if (self.to_read.len == 0 and | |
self.err != null) | |
{ | |
if (self.err.? == InflateError.EndOfStreamWithNoError) { | |
return n; | |
} | |
return self.err.?; | |
} | |
return n; | |
} | |
if (self.err != null) { | |
if (self.err.? == InflateError.EndOfStreamWithNoError) { | |
return 0; | |
} | |
return self.err.?; | |
} | |
self.step(self) catch { | |
if (self.to_read.len == 0) { | |
self.to_read = self.dict.readFlush(); | |
} | |
}; | |
} | |
} | |
pub fn close(self: *Self) ?Error { | |
if (self.err == InflateError.EndOfStreamWithNoError) { | |
return null; | |
} | |
return self.err; | |
} | |
const code_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; | |
fn readHuffman(self: *Self) Error!void { | |
while (self.nb < 5 + 5 + 4) { | |
try self.moreBits(); | |
} | |
var nlit = @intCast(u32, self.b & 0x1F) + 257; | |
if (nlit > max_num_lit) { | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
} | |
self.b >>= 5; | |
var ndist = @intCast(u32, self.b & 0x1F) + 1; | |
if (ndist > max_num_dist) { | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
} | |
self.b >>= 5; | |
var nclen = @intCast(u32, self.b & 0xF) + 4; | |
self.b >>= 4; | |
self.nb -= 5 + 5 + 4; | |
var i: u32 = 0; | |
while (i < nclen) : (i += 1) { | |
while (self.nb < 3) { | |
try self.moreBits(); | |
} | |
self.codebits[code_order[i]] = @intCast(u32, self.b & 0x7); | |
self.b >>= 3; | |
self.nb -= 3; | |
} | |
i = nclen; | |
while (i < code_order.len) : (i += 1) { | |
self.codebits[code_order[i]] = 0; | |
} | |
if (!try self.hd1.init(self.allocator, self.codebits[0..])) { | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
} | |
i = 0; | |
var n = nlit + ndist; | |
while (i < n) { | |
var x = try self.huffSym(&self.hd1); | |
if (x < 16) { | |
self.bits[i] = x; | |
i += 1; | |
continue; | |
} | |
var rep: u32 = 0; | |
var nb: u32 = 0; | |
var b: u32 = 0; | |
switch (x) { | |
16 => { | |
rep = 3; | |
nb = 2; | |
if (i == 0) { | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
} | |
b = self.bits[i - 1]; | |
}, | |
17 => { | |
rep = 3; | |
nb = 3; | |
b = 0; | |
}, | |
18 => { | |
rep = 11; | |
nb = 7; | |
b = 0; | |
}, | |
else => return error.BadInternalState, | |
} | |
while (self.nb < nb) { | |
try self.moreBits(); | |
} | |
rep += @intCast(u32, self.b & (@as(u32, 1) << @intCast(u5, nb)) - 1); | |
self.b >>= @intCast(u5, nb); | |
self.nb -= nb; | |
if (i + rep > n) { | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
} | |
var j: u32 = 0; | |
while (j < rep) : (j += 1) { | |
self.bits[i] = b; | |
i += 1; | |
} | |
} | |
if (!try self.hd1.init(self.allocator, self.bits[0..nlit]) or | |
!try self.hd2.init(self.allocator, self.bits[nlit .. nlit + ndist])) | |
{ | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
} | |
if (self.hd1.min < self.bits[end_block_marker]) { | |
self.hd1.min = self.bits[end_block_marker]; | |
} | |
return; | |
} | |
fn huffmanBlock(self: *Self) Error!void { | |
while (true) { | |
switch (self.step_state) { | |
.init => { | |
var v = try self.huffSym(self.hl.?); | |
var n: u32 = 0; | |
var length: u32 = 0; | |
switch (v) { | |
0...255 => { | |
self.dict.writeByte(@intCast(u8, v)); | |
if (self.dict.availWrite() == 0) { | |
self.to_read = self.dict.readFlush(); | |
self.step = huffmanBlock; | |
self.step_state = .init; | |
return; | |
} | |
self.step_state = .init; | |
continue; | |
}, | |
256 => { | |
self.finishBlock(); | |
return; | |
}, | |
257...264 => { | |
length = v - (257 - 3); | |
n = 0; | |
}, | |
265...268 => { | |
length = v * 2 - (265 * 2 - 11); | |
n = 1; | |
}, | |
269...272 => { | |
length = v * 4 - (269 * 4 - 19); | |
n = 2; | |
}, | |
273...276 => { | |
length = v * 8 - (273 * 8 - 35); | |
n = 3; | |
}, | |
277...280 => { | |
length = v * 16 - (277 * 16 - 67); | |
n = 4; | |
}, | |
281...284 => { | |
length = v * 32 - (281 * 32 - 131); | |
n = 5; | |
}, | |
max_num_lit - 1 => { | |
length = 258; | |
n = 0; | |
}, | |
else => { | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
}, | |
} | |
if (n > 0) { | |
while (self.nb < n) { | |
try self.moreBits(); | |
} | |
length += @intCast(u32, self.b) & ((@as(u32, 1) << @intCast(u5, n)) - 1); | |
self.b >>= @intCast(u5, n); | |
self.nb -= n; | |
} | |
var dist: u32 = 0; | |
if (self.hd == null) { | |
while (self.nb < 5) { | |
try self.moreBits(); | |
} | |
dist = @intCast( | |
u32, | |
bu.bitReverse(u8, @intCast(u8, (self.b & 0x1F) << 3), 8), | |
); | |
self.b >>= 5; | |
self.nb -= 5; | |
} else { | |
dist = try self.huffSym(self.hd.?); | |
} | |
switch (dist) { | |
0...3 => dist += 1, | |
4...max_num_dist - 1 => { | |
var nb = @intCast(u32, dist - 2) >> 1; | |
var extra = (dist & 1) << @intCast(u5, nb); | |
while (self.nb < nb) { | |
try self.moreBits(); | |
} | |
extra |= @intCast(u32, self.b & (@as(u32, 1) << @intCast(u5, nb)) - 1); | |
self.b >>= @intCast(u5, nb); | |
self.nb -= nb; | |
dist = (@as(u32, 1) << @intCast(u5, nb + 1)) + 1 + extra; | |
}, | |
else => { | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
}, | |
} | |
if (dist > self.dict.histSize()) { | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
} | |
self.copy_len = length; | |
self.copy_dist = dist; | |
self.step_state = .dict; | |
}, | |
.dict => { | |
var cnt = self.dict.tryWriteCopy(self.copy_dist, self.copy_len); | |
if (cnt == 0) { | |
cnt = self.dict.writeCopy(self.copy_dist, self.copy_len); | |
} | |
self.copy_len -= cnt; | |
if (self.dict.availWrite() == 0 or self.copy_len > 0) { | |
self.to_read = self.dict.readFlush(); | |
self.step = huffmanBlock; | |
self.step_state = .dict; | |
return; | |
} | |
self.step_state = .init; | |
}, | |
} | |
} | |
} | |
fn dataBlock(self: *Self) Error!void { | |
self.nb = 0; | |
self.b = 0; | |
var nr: u32 = 4; | |
self.inner_reader.readNoEof(self.buf[0..nr]) catch { | |
self.err = InflateError.UnexpectedEndOfStream; | |
return InflateError.UnexpectedEndOfStream; | |
}; | |
self.roffset += @intCast(u64, nr); | |
var n = @intCast(u32, self.buf[0]) | @intCast(u32, self.buf[1]) << 8; | |
var nn = @intCast(u32, self.buf[2]) | @intCast(u32, self.buf[3]) << 8; | |
if (@intCast(u16, nn) != @truncate(u16, ~n)) { | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
} | |
if (n == 0) { | |
self.to_read = self.dict.readFlush(); | |
self.finishBlock(); | |
return; | |
} | |
self.copy_len = n; | |
try self.copyData(); | |
} | |
fn copyData(self: *Self) Error!void { | |
var buf = self.dict.writeSlice(); | |
if (buf.len > self.copy_len) { | |
buf = buf[0..self.copy_len]; | |
} | |
var cnt = try self.inner_reader.read(buf); | |
if (cnt < buf.len) { | |
self.err = InflateError.UnexpectedEndOfStream; | |
} | |
self.roffset += @intCast(u64, cnt); | |
self.copy_len -= @intCast(u32, cnt); | |
self.dict.writeMark(@intCast(u32, cnt)); | |
if (self.err != null) { | |
return InflateError.UnexpectedEndOfStream; | |
} | |
if (self.dict.availWrite() == 0 or self.copy_len > 0) { | |
self.to_read = self.dict.readFlush(); | |
self.step = copyData; | |
return; | |
} | |
self.finishBlock(); | |
} | |
fn finishBlock(self: *Self) void { | |
if (self.final) { | |
if (self.dict.availRead() > 0) { | |
self.to_read = self.dict.readFlush(); | |
} | |
self.err = InflateError.EndOfStreamWithNoError; | |
} | |
self.step = nextBlock; | |
} | |
fn moreBits(self: *Self) InflateError!void { | |
var c = self.inner_reader.readByte() catch |e| { | |
if (e == error.EndOfStream) { | |
return InflateError.UnexpectedEndOfStream; | |
} | |
return InflateError.BadReaderState; | |
}; | |
self.roffset += 1; | |
self.b |= @intCast(u32, c) << @intCast(u5, self.nb); | |
self.nb += 8; | |
return; | |
} | |
fn huffSym(self: *Self, h: *HuffmanDecoder) InflateError!u32 { | |
var n = @intCast(u32, h.min); | |
var nb = self.nb; | |
var b = self.b; | |
while (true) { | |
while (nb < n) { | |
var c = self.inner_reader.readByte() catch |e| { | |
self.b = b; | |
self.nb = nb; | |
if (e == error.EndOfStream) { | |
return error.UnexpectedEndOfStream; | |
} | |
return InflateError.BadReaderState; | |
}; | |
self.roffset += 1; | |
b |= @intCast(u32, c) << @intCast(u5, nb & 31); | |
nb += 8; | |
} | |
var chunk = h.chunks[b & (huffman_num_chunks - 1)]; | |
n = @intCast(u32, chunk & huffman_count_mask); | |
if (n > huffman_chunk_bits) { | |
chunk = h.links[chunk >> huffman_value_shift][(b >> huffman_chunk_bits) & h.link_mask]; | |
n = @intCast(u32, chunk & huffman_count_mask); | |
} | |
if (n <= nb) { | |
if (n == 0) { | |
self.b = b; | |
self.nb = nb; | |
corrupt_input_error_offset = self.roffset; | |
self.err = InflateError.CorruptInput; | |
return InflateError.CorruptInput; | |
} | |
self.b = b >> @intCast(u5, n & 31); | |
self.nb = nb - n; | |
return @intCast(u32, chunk >> huffman_value_shift); | |
} | |
} | |
} | |
pub fn reset(s: *Self, new_reader: ReaderType, new_dict: ?[]const u8) !void { | |
s.inner_reader = new_reader; | |
s.step = nextBlock; | |
s.err = null; | |
s.dict.deinit(); | |
try s.dict.init(s.allocator, max_match_offset, new_dict); | |
return; | |
} | |
}; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const io = std.io; | |
const math = std.math; | |
const mem = std.mem; | |
const assert = std.debug.assert; | |
const MAXBITS = 15; | |
const MAXLCODES = 286; | |
const MAXDCODES = 30; | |
const MAXCODES = MAXLCODES + MAXDCODES; | |
const FIXLCODES = 288; | |
const PREFIX_LUT_BITS = 9; | |
const Huffman = struct { | |
const LUTEntry = packed struct { symbol: u16 align(4), len: u16 }; | |
count: [MAXBITS + 1]u16, | |
symbol: [MAXCODES]u16, | |
prefix_lut: [1 << PREFIX_LUT_BITS]LUTEntry, | |
last_code: u16, | |
last_index: u16, | |
min_code_len: u16, | |
const ConstructError = error{ Oversubscribed, IncompleteSet }; | |
fn construct(self: *Huffman, code_length: []const u16) ConstructError!void { | |
for (self.count) |*val| { | |
val.* = 0; | |
} | |
self.min_code_len = math.maxInt(u16); | |
for (code_length) |len| { | |
if (len != 0 and len < self.min_code_len) | |
self.min_code_len = len; | |
self.count[len] += 1; | |
} | |
if (self.count[0] == code_length.len) { | |
self.min_code_len = 0; | |
return; | |
} | |
var left: isize = 1; | |
for (self.count[1..]) |val| { | |
left *= 2; | |
left -= @as(isize, @bitCast(i16, val)); | |
if (left < 0) | |
return error.Oversubscribed; | |
} | |
var offset: [MAXBITS + 1]u16 = undefined; | |
var codes: [MAXBITS + 1]u16 = undefined; | |
{ | |
offset[1] = 0; | |
codes[1] = 0; | |
var len: usize = 1; | |
while (len < MAXBITS) : (len += 1) { | |
offset[len + 1] = offset[len] + self.count[len]; | |
codes[len + 1] = (codes[len] + self.count[len]) << 1; | |
} | |
} | |
self.prefix_lut = mem.zeroes(@TypeOf(self.prefix_lut)); | |
for (code_length) |len, symbol| { | |
if (len != 0) { | |
self.symbol[offset[len]] = @truncate(u16, symbol); | |
offset[len] += 1; | |
} | |
if (len == 0 or len > PREFIX_LUT_BITS) | |
continue; | |
const bits_to_fill = @intCast(u5, PREFIX_LUT_BITS - len); | |
const rev_code = bitReverse(u16, codes[len], len); | |
codes[len] += 1; | |
var j: usize = 0; | |
while (j < @as(usize, 1) << bits_to_fill) : (j += 1) { | |
const index = rev_code | (j << @intCast(u5, len)); | |
assert(self.prefix_lut[index].len == 0); | |
self.prefix_lut[index] = .{ | |
.symbol = @truncate(u16, symbol), | |
.len = @truncate(u16, len), | |
}; | |
} | |
} | |
self.last_code = codes[PREFIX_LUT_BITS + 1]; | |
self.last_index = offset[PREFIX_LUT_BITS + 1] - self.count[PREFIX_LUT_BITS + 1]; | |
if (left > 0) | |
return error.IncompleteSet; | |
} | |
}; | |
fn bitReverse(comptime T: type, value: T, N: usize) T { | |
const r = @bitReverse(T, value); | |
return r >> @intCast(math.Log2Int(T), @typeInfo(T).Int.bits - N); | |
} | |
pub fn InflateStream(comptime ReaderType: type) type { | |
return struct { | |
const Self = @This(); | |
pub const Error = ReaderType.Error || error{ | |
EndOfStream, | |
BadCounts, | |
InvalidBlockType, | |
InvalidDistance, | |
InvalidFixedCode, | |
InvalidLength, | |
InvalidStoredSize, | |
InvalidSymbol, | |
InvalidTree, | |
MissingEOBCode, | |
NoLastLength, | |
OutOfCodes, | |
}; | |
pub const Reader = io.Reader(*Self, Error, read); | |
inner_reader: ReaderType, | |
seen_eos: bool, | |
state: union(enum) { | |
DecodeBlockHeader: void, | |
DecodeBlockData: void, | |
Copy: usize, | |
CopyLit: u8, | |
CopyFrom: struct { distance: u16, length: u16 }, | |
}, | |
window: struct { | |
const WSelf = @This(); | |
buf: []u8, | |
wi: usize = 0, | |
ri: usize = 0, | |
el: usize = 0, | |
total_written: usize = 0, | |
fn readable(self: *WSelf) usize { | |
return self.el; | |
} | |
fn writable(self: *WSelf) usize { | |
return self.buf.len - self.el; | |
} | |
fn append(self: *WSelf, value: u8) usize { | |
if (self.writable() < 1) return 0; | |
self.appendUnsafe(value); | |
return 1; | |
} | |
inline fn appendUnsafe(self: *WSelf, value: u8) void { | |
self.buf[self.wi] = value; | |
self.wi = (self.wi + 1) & (self.buf.len - 1); | |
self.el += 1; | |
self.total_written += 1; | |
} | |
fn read(self: *WSelf, dest: []u8) usize { | |
const N = math.min(dest.len, self.readable()); | |
if (N == 0) return 0; | |
if (self.ri + N < self.buf.len) { | |
mem.copy(u8, dest, self.buf[self.ri .. self.ri + N]); | |
} else { | |
std.mem.copy(u8, dest, self.buf[self.ri..]); | |
const r = self.buf.len - self.ri; | |
std.mem.copy(u8, dest[r..], self.buf[0 .. N - r]); | |
} | |
self.ri = (self.ri + N) & (self.buf.len - 1); | |
self.el -= N; | |
return N; | |
} | |
fn copyFrom(self: *WSelf, distance: usize, length: usize) usize { | |
const N = math.min(length, self.writable()); | |
if (N == 0) return 0; | |
var i: usize = 0; | |
while (i < N) : (i += 1) { | |
const index = (self.wi -% distance) & (self.buf.len - 1); | |
self.appendUnsafe(self.buf[index]); | |
} | |
return N; | |
} | |
}, | |
huffman_tables: [2]Huffman = undefined, | |
hdist: *Huffman, | |
hlen: *Huffman, | |
fn peekBits(self: *Self, bits: usize) !u32 { | |
while (self.bits_left < bits) { | |
const byte = try self.inner_reader.readByte(); | |
self.bits |= @as(u32, byte) << @intCast(u5, self.bits_left); | |
self.bits_left += 8; | |
} | |
const mask = (@as(u32, 1) << @intCast(u5, bits)) - 1; | |
return self.bits & mask; | |
} | |
fn readBits(self: *Self, bits: usize) !u32 { | |
const val = try self.peekBits(bits); | |
self.discardBits(bits); | |
return val; | |
} | |
fn discardBits(self: *Self, bits: usize) void { | |
self.bits >>= @intCast(u5, bits); | |
self.bits_left -= bits; | |
} | |
fn stored(self: *Self) !void { | |
self.discardBits(self.bits_left); | |
const length = try self.inner_reader.readIntLittle(u16); | |
const length_cpl = try self.inner_reader.readIntLittle(u16); | |
if (length != ~length_cpl) | |
return error.InvalidStoredSize; | |
self.state = .{ .Copy = length }; | |
} | |
fn fixed(self: *Self) !void { | |
comptime var lencode: Huffman = undefined; | |
comptime var distcode: Huffman = undefined; | |
comptime { | |
@setEvalBranchQuota(100000); | |
const len_lengths = | |
[_]u16{8} ** 144 ++ | |
[_]u16{9} ** 112 ++ | |
[_]u16{7} ** 24 ++ | |
[_]u16{8} ** 8; | |
assert(len_lengths.len == FIXLCODES); | |
try lencode.construct(len_lengths[0..]); | |
const dist_lengths = [_]u16{5} ** MAXDCODES; | |
distcode.construct(dist_lengths[0..]) catch |err| switch (err) { | |
error.IncompleteSet => {}, | |
else => return err, | |
}; | |
} | |
self.hlen = &lencode; | |
self.hdist = &distcode; | |
self.state = .DecodeBlockData; | |
} | |
fn dynamic(self: *Self) !void { | |
const nlen = (try self.readBits(5)) + 257; | |
const ndist = (try self.readBits(5)) + 1; | |
const ncode = (try self.readBits(4)) + 4; | |
if (nlen > MAXLCODES or ndist > MAXDCODES) | |
return error.BadCounts; | |
const ORDER = [19]u16{ | |
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, | |
12, 3, 13, 2, 14, 1, 15, | |
}; | |
var lencode: Huffman = undefined; | |
{ | |
var lengths = std.mem.zeroes([19]u16); | |
for (ORDER[0..ncode]) |val| { | |
lengths[val] = @intCast(u16, try self.readBits(3)); | |
} | |
lencode.construct(lengths[0..]) catch return error.InvalidTree; | |
} | |
var lengths = std.mem.zeroes([MAXCODES]u16); | |
var i: usize = 0; | |
while (i < nlen + ndist) { | |
const symbol = try self.decode(&lencode); | |
switch (symbol) { | |
0...15 => { | |
lengths[i] = symbol; | |
i += 1; | |
}, | |
16 => { | |
if (i == 0) return error.NoLastLength; | |
const last_length = lengths[i - 1]; | |
const repeat = 3 + (try self.readBits(2)); | |
const last_index = i + repeat; | |
if (last_index > lengths.len) | |
return error.InvalidLength; | |
while (i < last_index) : (i += 1) { | |
lengths[i] = last_length; | |
} | |
}, | |
17 => { | |
i += 3 + (try self.readBits(3)); | |
}, | |
18 => { | |
i += 11 + (try self.readBits(7)); | |
}, | |
else => return error.InvalidSymbol, | |
} | |
} | |
if (i > nlen + ndist) | |
return error.InvalidLength; | |
if (lengths[256] == 0) | |
return error.MissingEOBCode; | |
self.huffman_tables[0].construct(lengths[0..nlen]) catch |err| switch (err) { | |
error.Oversubscribed => return error.InvalidTree, | |
error.IncompleteSet => { | |
if (nlen != self.huffman_tables[0].count[0] + self.huffman_tables[0].count[1]) { | |
return error.InvalidTree; | |
} | |
}, | |
}; | |
self.huffman_tables[1].construct(lengths[nlen .. nlen + ndist]) catch |err| switch (err) { | |
error.Oversubscribed => return error.InvalidTree, | |
error.IncompleteSet => { | |
if (ndist != self.huffman_tables[1].count[0] + self.huffman_tables[1].count[1]) { | |
return error.InvalidTree; | |
} | |
}, | |
}; | |
self.hlen = &self.huffman_tables[0]; | |
self.hdist = &self.huffman_tables[1]; | |
self.state = .DecodeBlockData; | |
} | |
fn codes(self: *Self, lencode: *Huffman, distcode: *Huffman) !bool { | |
const LENS = [29]u16{ | |
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, | |
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, | |
}; | |
const LEXT = [29]u16{ | |
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, | |
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, | |
}; | |
const DISTS = [30]u16{ | |
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, | |
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, | |
}; | |
const DEXT = [30]u16{ | |
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, | |
7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, | |
}; | |
while (true) { | |
const symbol = try self.decode(lencode); | |
switch (symbol) { | |
0...255 => { | |
const c = @truncate(u8, symbol); | |
if (self.window.append(c) == 0) { | |
self.state = .{ .CopyLit = c }; | |
return false; | |
} | |
}, | |
256 => { | |
return true; | |
}, | |
257...285 => { | |
const length_symbol = symbol - 257; | |
const length = LENS[length_symbol] + | |
@intCast(u16, try self.readBits(LEXT[length_symbol])); | |
const distance_symbol = try self.decode(distcode); | |
const distance = DISTS[distance_symbol] + | |
@intCast(u16, try self.readBits(DEXT[distance_symbol])); | |
if (distance > self.window.buf.len or distance > self.window.total_written) | |
return error.InvalidDistance; | |
const written = self.window.copyFrom(distance, length); | |
if (written != length) { | |
self.state = .{ | |
.CopyFrom = .{ | |
.distance = distance, | |
.length = length - @truncate(u16, written), | |
}, | |
}; | |
return false; | |
} | |
}, | |
else => return error.InvalidFixedCode, | |
} | |
} | |
} | |
fn decode(self: *Self, h: *Huffman) !u16 { | |
var prefix: u32 = 0; | |
var code_len = h.min_code_len; | |
if (code_len == 0) | |
return error.OutOfCodes; | |
while (true) { | |
_ = try self.peekBits(code_len); | |
prefix = self.bits & ((1 << PREFIX_LUT_BITS) - 1); | |
const lut_entry = &h.prefix_lut[prefix]; | |
if (lut_entry.len == 0) | |
break; | |
if (lut_entry.len <= code_len) { | |
self.discardBits(code_len); | |
return lut_entry.symbol; | |
} | |
code_len = lut_entry.len; | |
} | |
prefix = try self.readBits(PREFIX_LUT_BITS); | |
var len: usize = PREFIX_LUT_BITS + 1; | |
var first: usize = h.last_code; | |
var index: usize = h.last_index; | |
var code = bitReverse(u32, prefix, PREFIX_LUT_BITS + 1); | |
while (len <= MAXBITS) : (len += 1) { | |
code |= try self.readBits(1); | |
const count = h.count[len]; | |
if (code < first + count) { | |
return h.symbol[index + (code - first)]; | |
} | |
index += count; | |
first += count; | |
first <<= 1; | |
code <<= 1; | |
} | |
return error.OutOfCodes; | |
} | |
fn step(self: *Self) !void { | |
while (true) { | |
switch (self.state) { | |
.DecodeBlockHeader => { | |
if (self.seen_eos) return; | |
const last = @intCast(u1, try self.readBits(1)); | |
const kind = @intCast(u2, try self.readBits(2)); | |
self.seen_eos = last != 0; | |
switch (kind) { | |
0 => try self.stored(), | |
1 => try self.fixed(), | |
2 => try self.dynamic(), | |
3 => return error.InvalidBlockType, | |
} | |
}, | |
.DecodeBlockData => { | |
if (!try self.codes(self.hlen, self.hdist)) { | |
return; | |
} | |
self.state = .DecodeBlockHeader; | |
}, | |
.Copy => |*length| { | |
const N = math.min(self.window.writable(), length.*); | |
var i: usize = 0; | |
while (i < N) : (i += 1) { | |
var tmp: [1]u8 = undefined; | |
if ((try self.inner_reader.read(&tmp)) != 1) { | |
return error.EndOfStream; | |
} | |
self.window.appendUnsafe(tmp[0]); | |
} | |
if (N != length.*) { | |
length.* -= N; | |
return; | |
} | |
self.state = .DecodeBlockHeader; | |
}, | |
.CopyLit => |c| { | |
if (self.window.append(c) == 0) { | |
return; | |
} | |
self.state = .DecodeBlockData; | |
}, | |
.CopyFrom => |*info| { | |
const written = self.window.copyFrom(info.distance, info.length); | |
if (written != info.length) { | |
info.length -= @truncate(u16, written); | |
return; | |
} | |
self.state = .DecodeBlockData; | |
}, | |
} | |
} | |
} | |
fn init(source: ReaderType, window_slice: []u8) Self { | |
assert(math.isPowerOfTwo(window_slice.len)); | |
return Self{ | |
.inner_reader = source, | |
.window = .{ .buf = window_slice }, | |
.seen_eos = false, | |
.state = .DecodeBlockHeader, | |
.hdist = undefined, | |
.hlen = undefined, | |
.bits = 0, | |
.bits_left = 0, | |
}; | |
} | |
pub fn read(self: *Self, buffer: []u8) Error!usize { | |
if (buffer.len == 0) | |
return 0; | |
var read_amt: usize = self.window.read(buffer); | |
while (read_amt < buffer.len) { | |
try self.step(); | |
if (self.window.readable() == 0) | |
break; | |
read_amt += self.window.read(buffer[read_amt..]); | |
} | |
return read_amt; | |
} | |
pub fn reader(self: *Self) Reader { | |
return .{ .context = self }; | |
} | |
}; | |
} | |
pub fn inflateStream(reader: anytype, window_slice: []u8) InflateStream(@TypeOf(reader)) { | |
return InflateStream(@TypeOf(reader)).init(reader, window_slice); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment