Skip to content

Instantly share code, notes, and snippets.

@iszn11
Last active August 4, 2021 10:39
Show Gist options
  • Save iszn11/4b4ae5cb68620fde295de4053e62d566 to your computer and use it in GitHub Desktop.
Save iszn11/4b4ae5cb68620fde295de4053e62d566 to your computer and use it in GitHub Desktop.
PNG decoder based on stb_image.h implementation.
const std = @import("std");
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;
// PNG decoder based on stb_image.h implementation, including zlib decompressor. A bit more zig-friendly than original. Compared to
// stb_image.h:
// - only decodes from a slice (filename or callbacks not supported)
// - only decodes to 8-bit channels (any other bit depth is converted, no decoding to 16-bit or float samples)
// - requires an allocator (zig style)
// - doesn't support interlaced images
// - doesn't support constant transparent color (for RGB and grayscale, there can be a single color specified, which should be interpreted
// as transparent - this is ignored in this implementation; alpha channels in RGBA, grayscale with alpha and indexed colors should work)
// - doesn't support iPhone PNG at all
// - no vertical flip option
// - no stbi_info equivalent (get width, height and channel count without fully decoding)
// - forced to choose output channel count, but more choices: you can have a one, two, three or four channel image (from any source color
// format) and for single channel output you can choose which input channel is copied
//
// Other stuff:
// - performance: idk
// - correctness: works for me™
// - code quality: I changed int sizes randomly until it compiled, but at least there are comments
// - I learned quite a lot while making this
// - special thanks to Sean Barrett and other contributors of stb_image.h
//
// License:
//
// This is free and unencumbered software released into the public domain.
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
//
// In jurisdictions that recognize copyright laws, the author or authors
// of this software dedicate any and all copyright interest in the
// software to the public domain. We make this dedication for the benefit
// of the public at large and to the detriment of our heirs and
// successors. We intend this dedication to be an overt act of
// relinquishment in perpetuity of all present and future rights to this
// software under copyright law.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
// For more information, please refer to <http://unlicense.org/>
pub const SourceChannels = enum {
r,
g,
b,
a,
rg,
rgb,
rgba,
};
fn channelCount(channels: SourceChannels) u3 {
return switch (channels) {
.r, .g, .b, .a => 1,
.rg => 2,
.rgb => 3,
.rgba => 4,
};
}
const Error = error{OutOfMemory, InvalidPng, NotImplemented};
const IHDR = struct {
width: u32,
height: u32,
bit_depth: u8,
color_type: ColorType,
compression_method: u8,
filter_method: u8,
interlace_method: Interlace,
};
const Color = struct {
r: u8,
g: u8,
b: u8,
a: u8,
};
const ColorType = enum(u8) {
grayscale = 0,
rgb = 2,
indexed = 3,
grayscale_alpha = 4,
rgba = 6,
_,
};
const Interlace = enum(u8) {
none = 0,
adam7 = 1,
_,
};
const Filter = enum(u8) {
none = 0,
sub = 1,
up = 2,
average = 3,
paeth = 4,
// Additional synthetic filters used to avoid using a dummy row of zero serving as the previous scanline. Not valid filters for a PNG.
average_first_row = 254,
paeth_first_row = 255,
_,
};
/// Every PNG file starts with the following sequence of bytes.
const png_signature = [_]u8{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A};
fn chunk(comptime name: *const [4]u8) u32 {
return std.mem.readIntBig(u32, name);
}
/// Advance given slice by a given amount of bytes, effectively moving the begining of the slice but leaving the end unchanged. Returns
/// error if the slice is shorter than given amount.
fn advance(slicePtr: *[]const u8, amount: usize) !void {
const slice = slicePtr.*;
if (slice.len < amount) {
return error.OutOfRange;
}
slicePtr.* = slice[amount .. slice.len];
}
/// Advance given slice by a given amount of bytes, effectively moving the begining of the slice but leaving the end unchanged. Asserts that
/// the slice is at least as long as the given amount.
fn advanceUnchecked(slicePtr: *[]const u8, amount: usize) void {
const slice = slicePtr.*;
slicePtr.* = slice[amount .. slice.len];
}
/// Reads an integer from the beginning of a given slice and advances it past the integer. Integer is assumed to be in big-endian order.
/// Returns error if the slice is too short to contain the integer. The bit count of the integer must be divisible by 8.
fn readIntBigAdvance(comptime T: type, slicePtr: *[]const u8) !T {
const int_byte_len = @divExact(@typeInfo(T).Int.bits, 8);
if (slicePtr.len < int_byte_len) {
return error.OutOfRange;
}
const ret = std.mem.readIntBig(T, slicePtr.*[0..int_byte_len]);
advanceUnchecked(slicePtr, int_byte_len);
return ret;
}
pub fn loadPng(allocator: *Allocator, data: []const u8, widthPtr: *u32, heightPtr: *u32, channels: SourceChannels) Error![]u8 {
// Slice that start as data and advances as the image is read.
var remaining_data = data;
// Check if data starts with PNG signature.
if (!std.mem.startsWith(u8, data, &png_signature)) {
return error.InvalidPng;
}
// Advance to past the signature, which should be followed immediatelly by a series of chunks.
advance(&remaining_data, png_signature.len) catch return error.InvalidPng;
var at_first_chunk = true;
var ihdr: IHDR = undefined;
// Palette length of zero is used to indicate that no palette was defined (because a palette with zero entries is not allowed). The
// palette can contain from 1 to 256 entries.
var palette_length: u9 = 0;
var palette: [256]Color = undefined;
// For color types without alpha channel (grayscale and RGB), a single constant color can optionally be provided to serve as a
// background color, meaning that the specified color should be changed to fully transparent. The flag has_transparent_color is set to
// true if such color was provided, in which case it's being stored in transparent_color array. The array has 16 bit RGB components to
// acommodate for the highest possible bit depth, however, when lower bit depth is used, the colors in this array are using the same,
// lower bit depth. For grayscale color type, there is a single grayscale value used as a transparent color, which is stored at every
// index of the array.
var has_transparent_color = false;
var transparent_color = [_]u16{0, 0, 0};
var compressed_data = ArrayList(u8).init(allocator);
defer compressed_data.deinit();
// Iterate over chunks until they end. We end the iteration after the last chunk (which should be the IEND chunk) or when we run out of
// data, in which case it is a format error.
while (true) {
const chunk_length = readIntBigAdvance(u32, &remaining_data) catch return error.InvalidPng;
const chunk_type = readIntBigAdvance(u32, &remaining_data) catch return error.InvalidPng;
switch (chunk_type) {
// IHDR chunk structure:
// [4B] width: u32be
// [4B] height: u32be
// [1B] bit_depth: u8
// [1B] color_type: u8
// [1B] compression_method: u8
// [1B] filter_method: u8
// [1B] interlace_method: u8
chunk("IHDR") => {
// IHDR is required to be the first chunk.
if (!at_first_chunk) {
return error.InvalidPng;
}
at_first_chunk = false;
// IHDR has constant length of 13.
if (chunk_length != 13) {
return error.InvalidPng;
}
ihdr.width = readIntBigAdvance(u32, &remaining_data) catch return error.InvalidPng;
ihdr.height = readIntBigAdvance(u32, &remaining_data) catch return error.InvalidPng;
ihdr.bit_depth = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
ihdr.color_type = @intToEnum(ColorType, readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng);
ihdr.compression_method = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
ihdr.filter_method = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
ihdr.interlace_method = @intToEnum(Interlace, readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng);
// Zero is an invalid value for width and height.
if (ihdr.width == 0 or ihdr.height == 0) {
return error.InvalidPng;
}
// Bit depth is the number of bits per sample or per palette index. Valid values are 1, 2, 4, 8 and 16, although not all
// values are allowed for all color types. This is why we verify color type and bit depth at the same time.
switch (ihdr.color_type) {
// For grayscale, every pixel is a single grayscale sample. All valid bit depths are allowed.
.grayscale => if (ihdr.bit_depth != 1
and ihdr.bit_depth != 2
and ihdr.bit_depth != 4
and ihdr.bit_depth != 8
and ihdr.bit_depth != 16) return error.InvalidPng,
// For RGB, every pixel has three samples. Only bit depths 8 and 16 are allowed.
.rgb => if (ihdr.bit_depth != 8
and ihdr.bit_depth != 16) return error.InvalidPng,
// For indexed, every pixel is an index into palette. Indexed color type requires a PLTE chunk to appear. Only bit
// depths 1, 2, 4 and 8 are allowed.
.indexed => if (ihdr.bit_depth != 1
and ihdr.bit_depth != 2
and ihdr.bit_depth != 4
and ihdr.bit_depth != 8) return error.InvalidPng,
// For grayscale with alpha, every pixel has two samples. Only bit depths 8 and 16 are allowed.
.grayscale_alpha => if (ihdr.bit_depth != 8
and ihdr.bit_depth != 16) return error.InvalidPng,
// For RGBA, every pixel has four samples. Only bit depths 8 and 16 are allowed.
.rgba => if (ihdr.bit_depth != 8
and ihdr.bit_depth != 16) return error.InvalidPng,
// Every other value for color type is invalid.
_ => return error.InvalidPng,
}
// Only compression method zero is allowed. The compression method zero means deflate/inflate compression with a sliding
// window of at most 32768 bytes.
if (ihdr.compression_method != 0) {
return error.InvalidPng;
}
// Only filter method zero is allowed. Filter method zero uses adaptive filtering with five basic filter types.
if (ihdr.filter_method != 0) {
return error.InvalidPng;
}
// All valid interlace methods are specified in Interlace enum (there are two). We check if we got an unspecified interlace
// method.
if (@enumToInt(ihdr.interlace_method) >= 2) {
return error.InvalidPng;
}
},
// PLTE chunk structure:
// [3B * N] color: [N]{
// [1B] red: u8
// [1B] green: u8
// [1B] blue: u8
// }
// Where N is the chunk length divided by 3 with no remainder allowed.
chunk("PLTE") => {
// IHDR is required to be the first chunk.
if (at_first_chunk) {
return error.InvalidPng;
}
// There can be only one PLTE chunk.
if (palette_length != 0) {
return error.InvalidPng;
}
// The number of palette entries is determined by the chunk length. A chunk length not divisible by 3 is an error.
palette_length = std.math.cast(u9, std.math.divExact(u32, chunk_length, 3) catch return error.InvalidPng) catch return error.InvalidPng;
// The number of palette entries should be at least 1 and no higher than can be represented using specified bit depth.
if (palette_length < 1 or palette_length > @as(u9, 1) << @intCast(u4, ihdr.bit_depth)) {
return error.InvalidPng;
}
// Note that we can have fewer entries than the bit depth would allow. In that case any out of range index is considered an
// error.
// PLTE chunk should not appear for grayscale and grayscale with alpha color types.
if (ihdr.color_type == .grayscale or ihdr.color_type == .grayscale_alpha) {
return error.InvalidPng;
}
// Note that the PLTE chunk is allowed for RGB and RGBA color types. Its purpose is to suggest a set of colors to which the
// image should be quantized, if it cannot be displayed with the full range of colors. That is not our concern, however we
// should remember not to return an error.
var i: usize = 0;
while (i < palette_length) : (i += 1) {
// Note that palette format color is always RGB with 8 bits per component.
palette[i].r = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
palette[i].g = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
palette[i].b = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
// Although PLTE doesn't contain alpha information, we set and store an alpha value of 255 to provide a default. The
// reason why we have an alpha in the palette at all is because it can actually be provided, but in a different chunk
// (tRNS).
palette[i].a = 255;
}
},
// tRNS chunk structure depends on color type:
// - For grayscale:
// [2B] gray: u16
// - For RGB:
// [2B] red: u16
// [2B] green: u16
// [2B] blue: u16
// - For indexed:
// [N * 1B] alpha: [N]u8
// Where N is the chunk length.
chunk("tRNS") => {
// IHDR is required to be the first chunk.
if (at_first_chunk) {
return error.InvalidPng;
}
// tRNS should appear before IDAT.
if (compressed_data.items.len > 0) {
return error.InvalidPng;
}
switch (ihdr.color_type) {
// For grayscale color type, tRNS chunk provides a single grayscale value which should be treated as transparent.
.grayscale => {
has_transparent_color = true;
// For grayscale, tRNS chunk should have exactly 2 bytes
if (chunk_length != 2) {
return error.InvalidPng;
}
// Note that while the value takes up 2 bytes, its bit depth matches the bit depth of the image.
const gray_transparent = readIntBigAdvance(u16, &remaining_data) catch return error.InvalidPng;
transparent_color[0] = gray_transparent;
transparent_color[1] = gray_transparent;
transparent_color[2] = gray_transparent;
},
// For RGB color type, tRNS chunk provides a single RGB color which should be treated as transparent.
.rgb => {
has_transparent_color = true;
// For RGB, tRNS chunk should have exactly 6 bytes
if (chunk_length != 6) {
return error.InvalidPng;
}
// Note that while the values takes up 2 bytes each, their bit depth matches the bit depth of the image.
transparent_color[0] = readIntBigAdvance(u16, &remaining_data) catch return error.InvalidPng;
transparent_color[1] = readIntBigAdvance(u16, &remaining_data) catch return error.InvalidPng;
transparent_color[2] = readIntBigAdvance(u16, &remaining_data) catch return error.InvalidPng;
},
// For indexed color type, tRNS chunk provides alpha values for palette entries.
.indexed => {
// tRNS chunk should appear after PLTE.
if (palette_length == 0) {
return error.InvalidPng;
}
// The chunk length is also the amount of alpha entries provided. There can be no more alpha entries than palette
// entries. However, there can be less, in which case the alpha value for remaining entries should be 255. We
// already ensured that by setting the alpha value while reading PLTE chunk.
if (chunk_length > palette_length) {
return error.InvalidPng;
}
var i: usize = 0;
while (i < chunk_length) : (i += 1) {
palette[i].a = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
}
},
// tRNS chunk should not appear for grayscale with alpha and RGBA color types.
.grayscale_alpha, .rgba => return error.InvalidPng,
// Invalid color type should be rejected during IHDR chunk parsing.
_ => unreachable,
}
},
// The IDAT chunk contains the compressed image data. There can be many consecutive chunks, in which case the compressed
// datastream is a concatenation of the contents of all the IDAT chunks. Here we only copy the compressed data to decompress and
// decode it at the end.
chunk("IDAT") => {
// IHDR is required to be the first chunk.
if (at_first_chunk) {
return error.InvalidPng;
}
// Palette has to be specified before IDAT and if using indexed color type it is mandatory.
if (ihdr.color_type == .indexed and palette_length == 0) {
return error.InvalidPng;
}
// IDAT chunks are appended to compressed_data to be later decompressed.
if (remaining_data.len < chunk_length) {
return error.InvalidPng;
}
try compressed_data.appendSlice(remaining_data[0..chunk_length]);
advanceUnchecked(&remaining_data, chunk_length);
},
chunk("IEND") => {
// IHDR is required to be the first chunk.
if (at_first_chunk) {
return error.InvalidPng;
}
// IEND chunk is empty.
if (chunk_length != 0) {
return error.InvalidPng;
}
// There should be at least one IDAT before.
if (compressed_data.items.len == 0) {
return error.InvalidPng;
}
// Make an initial guess for the size of the decompressed data to avoid reallocations.
const samples_per_pixel: u3 = switch (ihdr.color_type) {
.grayscale => 1,
.rgb => 3,
.indexed => 1,
.grayscale_alpha => 2,
.rgba => 4,
_ => unreachable,
};
// Distance between scanlines. One scanline contains 1 byte for filtering type followed by tightly packed samples.
const stride = 1 + (ihdr.width * ihdr.bit_depth * samples_per_pixel + 7) / 8;
const bytes_total = ihdr.height * stride;
var decompressed_data = try ArrayList(u8).initCapacity(allocator, bytes_total);
defer decompressed_data.deinit();
decompressData(compressed_data.items, &decompressed_data) catch |err| switch (err) {
error.InvalidZlib => return error.InvalidPng,
error.OutOfMemory => return error.OutOfMemory,
};
// Not enough data.
if (decompressed_data.items.len < bytes_total) {
return error.InvalidPng;
}
try defilter(&ihdr, decompressed_data.items);
// Convert decompressed data to desired output format.
const out_stride = ihdr.width * channelCount(channels);
const out = try allocator.alloc(u8, ihdr.height * out_stride);
errdefer allocator.free(out);
var y: usize = 0;
while (y < ihdr.height) : (y += 1) {
const scanline = decompressed_data.items[stride * y + 1 .. stride * (y + 1)];
const out_scanline = out[out_stride * y .. out_stride * (y + 1)];
var out_pixel = Color{
.r = undefined,
.g = undefined,
.b = undefined,
.a = 255,
};
// TODO use tRNS information (has_transparent_color and transparent_color), remember to compare transparent_color using
// source bit depth.
var x: usize = 0;
while (x < ihdr.width) : (x += 1) {
switch (ihdr.color_type) {
.grayscale => switch (ihdr.bit_depth) {
1 => {
const shift = ~@truncate(u3, x);
const c = @as(u8, @truncate(u1, scanline[x / 8] >> shift)) * 0xFF;
out_pixel.r = c;
out_pixel.g = c;
out_pixel.b = c;
},
2 => {
const shift = @as(u3, ~@truncate(u2, x)) << 1;
const c = @as(u8, @truncate(u2, scanline[x / 4] >> shift)) * 0x55;
out_pixel.r = c;
out_pixel.g = c;
out_pixel.b = c;
},
4 => {
const shift = @as(u3, ~@truncate(u1, x)) << 2;
const c = @as(u8, @truncate(u4, scanline[x / 2] >> shift)) * 0x11;
out_pixel.r = c;
out_pixel.g = c;
out_pixel.b = c;
},
8 => {
const c = scanline[x];
out_pixel.r = c;
out_pixel.g = c;
out_pixel.b = c;
},
16 => {
const c = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[2 * x..]) >> 8);
out_pixel.r = c;
out_pixel.g = c;
out_pixel.b = c;
},
else => unreachable,
},
.rgb => switch (ihdr.bit_depth) {
8 => {
out_pixel.r = scanline[3 * x];
out_pixel.g = scanline[3 * x + 1];
out_pixel.b = scanline[3 * x + 2];
},
16 => {
out_pixel.r = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[6 * x..]) >> 8);
out_pixel.g = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[6 * x + 2..]) >> 8);
out_pixel.b = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[6 * x + 4..]) >> 8);
},
else => unreachable,
},
.indexed => switch (ihdr.bit_depth) {
1 => {
const shift = ~@truncate(u3, x);
const ind = @as(u8, @truncate(u1, scanline[x / 8] >> shift));
if (ind >= palette_length) return error.InvalidPng;
out_pixel = palette[ind];
},
2 => {
const shift = @as(u3, ~@truncate(u2, x)) << 1;
const ind = @as(u8, @truncate(u2, scanline[x / 4] >> shift));
if (ind >= palette_length) return error.InvalidPng;
out_pixel = palette[ind];
},
4 => {
const shift = @as(u3, ~@truncate(u1, x)) << 2;
const ind = @as(u8, @truncate(u4, scanline[x / 2] >> shift));
if (ind >= palette_length) return error.InvalidPng;
out_pixel = palette[ind];
},
8 => {
const ind = scanline[x];
if (ind >= palette_length) return error.InvalidPng;
out_pixel = palette[ind];
},
else => unreachable,
},
.grayscale_alpha => switch (ihdr.bit_depth) {
8 => {
const c = scanline[2 * x];
const a = scanline[2 * x + 1];
out_pixel.r = c;
out_pixel.g = c;
out_pixel.b = c;
out_pixel.a = a;
},
16 => {
const c = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[4 * x..]) >> 8);
const a = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[4 * x + 2..]) >> 8);
out_pixel.r = c;
out_pixel.g = c;
out_pixel.b = c;
out_pixel.a = a;
},
else => unreachable,
},
.rgba => switch (ihdr.bit_depth) {
8 => {
out_pixel.r = scanline[4 * x];
out_pixel.g = scanline[4 * x + 1];
out_pixel.b = scanline[4 * x + 2];
out_pixel.a = scanline[4 * x + 3];
},
16 => {
out_pixel.r = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[8 * x..]) >> 8);
out_pixel.g = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[8 * x + 2..]) >> 8);
out_pixel.b = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[8 * x + 4..]) >> 8);
out_pixel.a = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[8 * x + 6..]) >> 8);
},
else => unreachable,
},
_ => unreachable,
}
switch (channels) {
.r => out_scanline[x] = out_pixel.r,
.g => out_scanline[x] = out_pixel.g,
.b => out_scanline[x] = out_pixel.b,
.a => out_scanline[x] = out_pixel.a,
.rg => {
out_scanline[2 * x] = out_pixel.r;
out_scanline[2 * x + 1] = out_pixel.g;
},
.rgb => {
out_scanline[3 * x] = out_pixel.r;
out_scanline[3 * x + 1] = out_pixel.g;
out_scanline[3 * x + 2] = out_pixel.b;
},
.rgba => {
out_scanline[4 * x] = out_pixel.r;
out_scanline[4 * x + 1] = out_pixel.g;
out_scanline[4 * x + 2] = out_pixel.b;
out_scanline[4 * x + 3] = out_pixel.a;
}
}
}
}
widthPtr.* = ihdr.width;
heightPtr.* = ihdr.height;
return out;
},
// Unrecognized chunk. We can skip it unless its a critical chunk.
else => {
// IHDR is required to be the first chunk.
if (at_first_chunk) {
return error.InvalidPng;
}
// If the 29th bit of the chunk type is set, the chunk is considered to be a critical chunk. We should reject any image with
// a critical chunk that we don't recognize.
if (chunk_type & (1 << 29) == 0) {
return error.NotImplemented;
}
// Skip over the data.
advance(&remaining_data, chunk_length) catch return error.InvalidPng;
}
}
// Every PNG chunk ends with CRC32. The checksum is not included in the chunk length. We're skipping it without verifying.
advance(&remaining_data, 4) catch return error.InvalidPng;
}
}
fn defilter(ihdr: *IHDR, decompressed_data: []u8) !void {
if (ihdr.interlace_method != .none) {
return error.NotImplemented; // TODO
}
const samples_per_pixel: u3 = switch (ihdr.color_type) {
.grayscale => 1,
.rgb => 3,
.indexed => 1,
.grayscale_alpha => 2,
.rgba => 4,
_ => unreachable,
};
const bytes_per_scanline = (ihdr.width * ihdr.bit_depth * samples_per_pixel + 7) / 8;
const stride = 1 + bytes_per_scanline;
if (decompressed_data.len < ihdr.height * stride) {
return error.InvalidPng;
}
const input_channels: u3 = switch (ihdr.color_type) {
.grayscale => 1,
.rgb => 3,
.indexed => 1,
.grayscale_alpha => 2,
.rgba => 4,
_ => unreachable,
};
// How many bytes do we look back to get the "corresponding" byte of the pixel to the left.
const pixel_stride = input_channels * ((ihdr.bit_depth + 7) / 8);
var y: usize = 0;
while (y < ihdr.height) : (y += 1) {
const filter = @intToEnum(Filter, decompressed_data[stride * y]);
if (@enumToInt(filter) >= 5) {
return error.InvalidPng;
}
const scanline = decompressed_data[stride * y + 1 .. stride * (y + 1)];
const last_scanline = if (y == 0) decompressed_data[0..0] else decompressed_data[stride * (y - 1) + 1 .. stride * y];
const real_filter = if (y == 0) switch (filter) {
.up => .none,
.average => .average_first_row,
.paeth => .paeth_first_row,
else => filter,
} else filter;
// Handle first pixel.
var i: usize = 0;
switch (real_filter) {
.none => i = pixel_stride,
.sub => i = pixel_stride,
.up => while (i < pixel_stride) : (i += 1) {
scanline[i] = last_scanline[i];
},
.average => while (i < pixel_stride) : (i += 1) {
scanline[i] +%= last_scanline[i] >> 1;
},
.paeth => while (i < pixel_stride) : (i += 1) {
scanline[i] +%= paeth(0, last_scanline[i], 0);
},
.average_first_row => i = pixel_stride,
.paeth_first_row => i = pixel_stride,
_ => unreachable,
}
// Handle rest of the scanline.
switch (real_filter) {
.none => {},
.sub => while (i < bytes_per_scanline) : (i += 1) {
scanline[i] +%= scanline[i - pixel_stride];
},
.up => while (i < bytes_per_scanline) : (i += 1) {
scanline[i] +%= last_scanline[i];
},
.average => while (i < bytes_per_scanline) : (i += 1) {
scanline[i] +%= @intCast(u8, @as(u9, scanline[i - pixel_stride] + last_scanline[i]) >> 1);
},
.paeth => while (i < bytes_per_scanline) : (i += 1) {
scanline[i] +%= paeth(scanline[i - pixel_stride], last_scanline[i], last_scanline[i - pixel_stride]);
},
.average_first_row => while (i < bytes_per_scanline) : (i += 1) {
scanline[i] +%= scanline[i - pixel_stride] >> 1;
},
.paeth_first_row => while (i < bytes_per_scanline) : (i += 1) {
scanline[i] +%= paeth(scanline[i - pixel_stride], 0, 0);
},
_ => unreachable,
}
}
}
fn paeth(a: i16, b: i16, c: i16) u8 {
const p = a + b - c;
const pa = std.math.absInt(p - a) catch unreachable;
const pb = std.math.absInt(p - b) catch unreachable;
const pc = std.math.absInt(p - c) catch unreachable;
if (pa <= pb and pa <= pc) return @intCast(u8, a);
if (pb <= pc) return @intCast(u8, b);
return @intCast(u8, c);
}
// --- ZLIB PORT ---------------------------------------------------------------
// Refer to RFC 1950 and 1951 if you want to have any idea what is happening here.
// TODO This entire section is an overflow hell. Where can we trust the values that we have? Where an overflow could occur and when are we
// sure it won't? Which int casts are safe?
const Zlib = struct {
data: []const u8,
remaining_data: []const u8,
// buffer for unpacking input data bit by bit
remaining_bits: u32 = 0,
remaining_bits_count: u6 = 0, // [0, 32]
out: *ArrayList(u8),
/// Discards bits to be byte-aligned.
pub fn alignToByte(self: *Zlib) void {
const to_discard = @truncate(u3, self.remaining_bits_count);
self.remaining_bits >>= to_discard;
self.remaining_bits_count -= to_discard;
}
/// Refils internal bit buffer. Never fails, instead it won't refill fully when data ends.
pub fn refillBits(self: *Zlib) void {
while (self.remaining_bits_count < 24 and self.remaining_data.len > 0) {
const byte = self.remaining_data[0];
advanceUnchecked(&self.remaining_data, 1);
self.remaining_bits |= @as(u32, byte) << @intCast(u5, self.remaining_bits_count);
self.remaining_bits_count += 8;
}
}
/// Get a runtime-known amount of bits (up to 16).
pub fn getBits(self: *Zlib, bit_count: u5) !u16 {
std.debug.assert(bit_count <= 16);
if (self.remaining_bits_count < bit_count) {
self.refillBits();
// If we still have less bits than requested, it means we ran out of data.
if (self.remaining_bits_count < bit_count) {
return error.InvalidZlib;
}
}
const ret = @intCast(u16, self.remaining_bits & ((@as(u32, 1) << bit_count) - 1));
self.remaining_bits >>= bit_count;
self.remaining_bits_count -= bit_count;
return ret;
}
/// Get a comptime-known amount of bits (up to 16).
pub fn getBitsCt(self: *Zlib, comptime T: type) !T {
const bit_count = @typeInfo(T).Int.bits;
// TODO We could detect wrong signedness and bit count at compile time to produce a compiler error.
return @intCast(T, try self.getBits(bit_count));
}
};
// symbol - decoded value (e.g. literal byte, length, distance), up to 9 bits
// code - code for value, variable bit length
// bit count - bit length of a code
// ordinal - index of a symbol if you stable sort the symbols by their bit count
const Huffman = struct {
/// Use next 9 bits as an index into this array. If value is different than 0, there is a fast path to decode next code. The symbol
/// value is in bits 0-8. The higher bits contain bit count. For a code with bit count higher than 9 you won't find it in this table and
/// will have to resort to the slow path.
fast_lut: [1 << fast_bits]u16,
/// First code for a given bit count.
first_code: [16]u17,
/// Biggest code value for a given bit count.
max_code: [16]u17,
/// First ordinal for a given bit count.
first_symbol: [16]u17,
/// Bit count for a code that encodes given ordinal.
bit_count: [symbol_count]u8,
/// Symbol that corresponds to a given ordinal.
symbol: [symbol_count]u16,
const fast_bits = 9;
const fast_mask = (1 << fast_bits) - 1;
const symbol_count = 288;
/// Predefined bit counts for codes that encode symbols in joined value-length alphabet.
const predefined_val_len_bit_count = [_]u8 {
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8,
};
/// Predefined bit counts for codes that encode symbols in distance alphabet.
const predefined_dist_bit_count = [_]u8 {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
/// Precomputed Huffman structure for predefined joined value-length alphabet.
const predefined_val_len_huffman: Huffman = blk: {
@setEvalBranchQuota(10000);
var huffman: Huffman = undefined;
huffman.build(&predefined_val_len_bit_count) catch unreachable;
break :blk huffman;
};
/// Precomputed Huffman structure for predefined distance alphabet.
const predefined_dist_huffman: Huffman = blk: {
@setEvalBranchQuota(10000);
var huffman: Huffman = undefined;
huffman.build(&predefined_dist_bit_count) catch unreachable;
break :blk huffman;
};
// RFC 1951 (page 8)
fn build(self: *Huffman, bit_count: []const u8) !void {
// Clear the fast LUT.
std.mem.set(u16, &self.fast_lut, 0);
// Count the number of codes for each bit length.
var code_count = [_]u16{0} ** 16;
for (bit_count) |bc| {
code_count[bc] += 1; // TODO Can we trust this value to be a small enough index?
}
// Codes that are never used receive bit count of zero. We must not assign them codes, so we pretend they don't exist.
code_count[0] = 0;
// There can be no more codes for a given bit count than the number of bits would allow.
for (code_count) |cc, i| {
if (cc > (@as(u16, 1) << @intCast(u4, i))) {
return error.InvalidZlib;
}
}
// Find the value of the smallest code for each bit count.
var next_code: [16]u17 = undefined;
var code: u17 = 0;
var symbol: u16 = 0;
var bc: u4 = 1;
while (true) {
next_code[bc] = code;
self.first_code[bc] = code;
self.first_symbol[bc] = symbol;
code += code_count[bc];
if (code_count[bc] != 0 and code - 1 >= @as(u17, 1) << bc) {
return error.InvalidZlib;
}
self.max_code[bc] = code << (~bc + 1); // preshift for inner loop
code <<= 1;
symbol += code_count[bc];
if (bc == 15) break;
bc += 1;
}
symbol = 0;
while (symbol < bit_count.len) : (symbol += 1) {
bc = @intCast(u4, bit_count[symbol]);
if (bc == 0) {
continue;
}
const ordinal = next_code[bc] - self.first_code[bc] + self.first_symbol[bc];
const fast_value = (@as(u16, bc) << 9) | symbol;
self.bit_count[ordinal] = bc;
self.symbol[ordinal] = symbol;
if (bc <= fast_bits) {
code = @bitReverse(u9, @intCast(u9, next_code[bc])) >> (9 - bc);
while (code < (1 << fast_bits)) : (code += @as(u16, 1) << bc) {
self.fast_lut[code] = fast_value;
}
}
next_code[bc] += 1;
}
}
};
pub fn decompressData(data: []const u8, out: *ArrayList(u8)) !void {
var zlib = Zlib{
.data = data,
.remaining_data = data,
.out = out,
};
// ZLIB structure:
// [1B] compression_method_and_flags: u8
// [1B] flags: u8
// if (preset_dictionary) [4B] preset_dictionary: u32
// [?] compressed_data
// [4B] adler32: u32
// Where preset_dictionary is a flag on bit 5 of flags.
// Parsing header.
const compression_method_and_flags = try zlib.getBitsCt(u8);
const compression_method = compression_method_and_flags & 0b0000_1111;
// Bits 4-7 in compression_method_and_flags contain compression info, from which we can get LZ77 window size. We don't use that
// information.
const flags = try zlib.getBitsCt(u8);
//const check_bits = flags & 0b0001_1111;
const preset_dictionary = flags & 0b0010_0000 != 0;
// check_bits value (per spec) must be such that compression_method_and_flags and flags, when viewed as a 16-bit unsigned integer stored
// in MSB order (compression_method_and_flags * 256 + flags) is a multiple of 31.
if ((@as(u16, compression_method_and_flags) * 256 + flags) % 31 != 0) {
return error.InvalidZlib;
}
// When the zlib data format is being used as a part of another standard format, a compliant decompressor must support all the preset
// dictionaties specified by the other format. The other format (being PNG in our case) specifies that the preset dictionary is not
// allowed.
if (preset_dictionary) {
return error.InvalidZlib;
}
// Compression method 8 is deflate, which is required by PNG.
if (compression_method != 8) {
return error.InvalidZlib;
}
// Parsing compressed data.
while (true) {
const final = (try zlib.getBitsCt(u1)) != 0;
const block_type = try zlib.getBitsCt(u2);
switch (block_type) {
// no compression
0 => try parseUncompressedBlock(&zlib),
// compressed with fixed Huffman codes
1 => {
try parseHuffmanBlock(
&zlib,
&Huffman.predefined_val_len_huffman,
&Huffman.predefined_dist_huffman
);
},
// compressed with dynamic Huffman codes
2 => {
var val_len_huffman: Huffman = undefined;
var dist_huffman: Huffman = undefined;
try computeHuffmanCodes(&zlib, &val_len_huffman, &dist_huffman);
try parseHuffmanBlock(&zlib, &val_len_huffman, &dist_huffman);
},
// reserved value (error)
3 => return error.InvalidZlib,
}
if (final) {
break;
}
}
}
fn parseUncompressedBlock(zlib: *Zlib) !void {
zlib.alignToByte();
zlib.refillBits();
if (zlib.remaining_bits_count < 32) {
// Assert that we actually ran out of data, not refilled to a lower amount beacuse calling alignToByte didn't align.
std.debug.assert(zlib.remaining_bits_count & 7 == 0 and zlib.remaining_data.len == 0);
return error.InvalidZlib;
}
// Manually consume next 32 bits to read two u16 numbers.
const len = @byteSwap(u16, @intCast(u16, zlib.remaining_bits & 0xFFFF)); // block content length
const nlen = @byteSwap(u16, @intCast(u16, zlib.remaining_bits >> 16)); // one's complement (bit not) of len
zlib.remaining_bits = 0;
zlib.remaining_bits_count = 0;
if (nlen != ~len) {
return error.InvalidZlib;
}
if (zlib.remaining_data.len < len) {
return error.InvalidZlib;
}
try zlib.out.appendSlice(zlib.remaining_data[0..len]);
advanceUnchecked(&zlib.remaining_data, len);
}
const internal_symbol_order = [_]u8 {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
// RFC 1951 (page 13-14)
fn computeHuffmanCodes(zlib: *Zlib, val_len_huffman: *Huffman, dist_huffman: *Huffman) !void {
// Arrays of bit counts for joined value-length alphabet and for distance alphabet are themselves encoded using dynamic Huffman
// encoding, which we call here the internal Huffman.
var internal_huffman: Huffman = undefined;
// Number of value-length symbols in use, [257, 286].
const val_len_symbol_count = (try zlib.getBits(5)) + 257; // TODO check this value or add overflow padding to bit_count?
// Number of distance symbols in use, [1, 32].
const dist_symbol_count = (try zlib.getBits(5)) + 1;
// Number of internal symbols in use, [4, 19].
const internal_symbol_count = (try zlib.getBits(4)) + 4;
// Reading bit counts for the internal Huffman encoding. Thankfully, these bit counts are not Huffman encoded.
var internal_bit_count = [_]u8{0} ** 19;
var i: u16 = 0;
while (i < internal_symbol_count) : (i += 1) {
const bc = try zlib.getBitsCt(u3);
internal_bit_count[internal_symbol_order[i]] = bc;
}
try internal_huffman.build(&internal_bit_count);
// Internal Huffman alphabet:
// 0-15: Literal value 0-15
// 16: Repeat previous value 3 + (value of 2 next bits) times
// 17: Literal value 0 times 3 + (value of 3 next bits)
// 18: Literal value 0 times 11 + (value of 7 next bits)
// Bit count table joined for all alphabets, because repeats can cross over dicionary boundary.
var bit_count: [286 + 32]u8 = undefined;
const total_symbol_count = val_len_symbol_count + dist_symbol_count;
i = 0;
while (i < total_symbol_count) {
var symbol = @intCast(u8, try decodeHuffman(zlib, &internal_huffman));
switch (symbol) {
// literal
0 ... 15 => {
bit_count[i] = symbol;
i += 1;
},
// repeat previous
16 => {
if (i == 0) {
return error.InvalidZlib;
}
const len = (try zlib.getBits(2)) + 3;
if (i + len > total_symbol_count) return error.InvalidZlib;
std.mem.set(u8, bit_count[i .. i + len], bit_count[i - 1]);
i += len;
},
// multiple literal 0
17 => {
const len = (try zlib.getBits(3)) + 3;
if (i + len > total_symbol_count) return error.InvalidZlib;
std.mem.set(u8, bit_count[i .. i + len], 0);
i += len;
},
// multiple literal 0
18 => {
const len = (try zlib.getBits(7)) + 11;
if (i + len > total_symbol_count) return error.InvalidZlib;
std.mem.set(u8, bit_count[i .. i + len], 0);
i += len;
},
else => return error.InvalidZlib,
}
}
if (i != total_symbol_count) {
return error.InvalidZlib;
}
try val_len_huffman.build(bit_count[0..val_len_symbol_count]);
try dist_huffman.build(bit_count[val_len_symbol_count .. val_len_symbol_count + dist_symbol_count]);
}
fn parseHuffmanBlock(zlib: *Zlib, val_len_huffman: *const Huffman, dist_huffman: *const Huffman) !void {
while (true) {
const val_len_symbol: u16 = try decodeHuffman(zlib, val_len_huffman);
// byte literal
if (val_len_symbol < 256) {
try zlib.out.append(@intCast(u8, val_len_symbol));
// end-of-block code
} else if (val_len_symbol == 256) {
return;
// copy bytes from before
} else {
// read length
const len_ind = val_len_symbol - 257;
const len: u16 = len_base[len_ind] + try zlib.getBits(len_extra_bits[len_ind]);
// read distance
const dist_symbol = try decodeHuffman(zlib, dist_huffman);
const dist = dist_base[dist_symbol] + try zlib.getBits(dist_extra_bits[dist_symbol]);
if (zlib.out.items.len < dist) {
return error.InvalidZlib;
}
var ptr = zlib.out.items.ptr + zlib.out.items.len - dist;
// Run of one byte, common in images. We can reuse the same value instead of moving ptr.
if (dist == 1) {
const value = ptr[0];
try zlib.out.appendNTimes(value, len);
} else {
try zlib.out.ensureUnusedCapacity(len);
var i: u16 = 0;
while (i < len) : (i += 1) {
const value = ptr[0];
zlib.out.appendAssumeCapacity(value);
ptr += 1;
}
}
}
}
}
const len_base = [_]u16 {
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0,
};
const len_extra_bits = [_]u5 {
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0,
};
const dist_base = [_]u32 {
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0,
};
const dist_extra_bits = [_]u5 {
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
};
fn decodeHuffman(zlib: *Zlib, huffman: *const Huffman) !u16 {
zlib.refillBits();
// We shouldn't run out of data in the context of decoding a code. That's because after blocks end there is a 32 bit checksum.
if (zlib.remaining_bits_count < 24) {
return error.InvalidZlib;
}
const fast_value: u16 = huffman.fast_lut[zlib.remaining_bits & Huffman.fast_mask];
// Fast path (lookup table).
if (fast_value != 0) {
var bit_count = @intCast(u5, fast_value >> 9);
zlib.remaining_bits >>= bit_count;
zlib.remaining_bits_count -= bit_count;
return fast_value & 0x01FF;
}
// Slow path.
const next_bits: u16 = @bitReverse(u16, @truncate(u16, zlib.remaining_bits));
var bit_count: u4 = Huffman.fast_bits + 1;
while (true) {
if (next_bits < huffman.max_code[bit_count]) {
break;
}
// Invalid code.
if (bit_count == 15) {
return error.InvalidZlib;
}
bit_count += 1;
}
const ordinal = (next_bits >> (~bit_count + 1)) - huffman.first_code[bit_count] + huffman.first_symbol[bit_count];
if (ordinal >= Huffman.symbol_count) {
return error.InvalidZlib;
}
// From stbi_image.h: "was originally an assert, but report failure instead."
if (huffman.bit_count[ordinal] != bit_count) {
return error.InvalidZlib;
}
zlib.remaining_bits >>= bit_count;
zlib.remaining_bits_count -= bit_count;
return huffman.symbol[ordinal];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment