iszn11/image.zig

## image.zig
const std = @import("std");
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;

// PNG decoder based on stb_image.h implementation, including zlib decompressor. A bit more zig-friendly than original. Compared to
// stb_image.h:
// - only decodes from a slice (filename or callbacks not supported)
// - only decodes to 8-bit channels (any other bit depth is converted, no decoding to 16-bit or float samples)
// - requires an allocator (zig style)
// - doesn't support interlaced images
// - doesn't support constant transparent color (for RGB and grayscale, there can be a single color specified, which should be interpreted
//   as transparent - this is ignored in this implementation; alpha channels in RGBA, grayscale with alpha and indexed colors should work)
// - doesn't support iPhone PNG at all
// - no vertical flip option
// - no stbi_info equivalent (get width, height and channel count without fully decoding)
// - forced to choose output channel count, but more choices: you can have a one, two, three or four channel image (from any source color
//   format) and for single channel output you can choose which input channel is copied
//
// Other stuff:
// - performance: idk
// - correctness: works for me™
// - code quality: I changed int sizes randomly until it compiled, but at least there are comments
// - I learned quite a lot while making this
// - special thanks to Sean Barrett and other contributors of stb_image.h
//
// License:
//
// This is free and unencumbered software released into the public domain.
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
//
// In jurisdictions that recognize copyright laws, the author or authors
// of this software dedicate any and all copyright interest in the
// software to the public domain. We make this dedication for the benefit
// of the public at large and to the detriment of our heirs and
// successors. We intend this dedication to be an overt act of
// relinquishment in perpetuity of all present and future rights to this
// software under copyright law.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
// For more information, please refer to <http://unlicense.org/>

pub const SourceChannels = enum {
    r,
    g,
    b,
    a,
    rg,
    rgb,
    rgba,
};

fn channelCount(channels: SourceChannels) u3 {
    return switch (channels) {
        .r, .g, .b, .a => 1,
        .rg => 2,
        .rgb => 3,
        .rgba => 4,
    };
}

const Error = error{OutOfMemory, InvalidPng, NotImplemented};

const IHDR = struct {
    width: u32,
    height: u32,
    bit_depth: u8,
    color_type: ColorType,
    compression_method: u8,
    filter_method: u8,
    interlace_method: Interlace,
};

const Color = struct {
    r: u8,
    g: u8,
    b: u8,
    a: u8,
};

const ColorType = enum(u8) {
    grayscale = 0,
    rgb = 2,
    indexed = 3,
    grayscale_alpha = 4,
    rgba = 6,
    _,
};

const Interlace = enum(u8) {
    none = 0,
    adam7 = 1,
    _,
};

const Filter = enum(u8) {
    none = 0,
    sub = 1,
    up = 2,
    average = 3,
    paeth = 4,
    // Additional synthetic filters used to avoid using a dummy row of zero serving as the previous scanline. Not valid filters for a PNG.
    average_first_row = 254,
    paeth_first_row = 255,
    _,
};

/// Every PNG file starts with the following sequence of bytes.
const png_signature = [_]u8{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A};

fn chunk(comptime name: *const [4]u8) u32 {
    return std.mem.readIntBig(u32, name);
}

/// Advance given slice by a given amount of bytes, effectively moving the begining of the slice but leaving the end unchanged. Returns
/// error if the slice is shorter than given amount.
fn advance(slicePtr: *[]const u8, amount: usize) !void {
    const slice = slicePtr.*;
    if (slice.len < amount) {
        return error.OutOfRange;
    }
    slicePtr.* = slice[amount .. slice.len];
}

/// Advance given slice by a given amount of bytes, effectively moving the begining of the slice but leaving the end unchanged. Asserts that
/// the slice is at least as long as the given amount.
fn advanceUnchecked(slicePtr: *[]const u8, amount: usize) void {
    const slice = slicePtr.*;
    slicePtr.* = slice[amount .. slice.len];
}

/// Reads an integer from the beginning of a given slice and advances it past the integer. Integer is assumed to be in big-endian order.
/// Returns error if the slice is too short to contain the integer. The bit count of the integer must be divisible by 8.
fn readIntBigAdvance(comptime T: type, slicePtr: *[]const u8) !T {
    const int_byte_len = @divExact(@typeInfo(T).Int.bits, 8);
    if (slicePtr.len < int_byte_len) {
        return error.OutOfRange;
    }
    const ret = std.mem.readIntBig(T, slicePtr.*[0..int_byte_len]);
    advanceUnchecked(slicePtr, int_byte_len);
    return ret;
}

pub fn loadPng(allocator: *Allocator, data: []const u8, widthPtr: *u32, heightPtr: *u32, channels: SourceChannels) Error![]u8 {
    // Slice that start as data and advances as the image is read.
    var remaining_data = data;

    // Check if data starts with PNG signature.

    if (!std.mem.startsWith(u8, data, &png_signature)) {
        return error.InvalidPng;
    }

    // Advance to past the signature, which should be followed immediatelly by a series of chunks.
    advance(&remaining_data, png_signature.len) catch return error.InvalidPng;

    var at_first_chunk = true;
    var ihdr: IHDR = undefined;

    // Palette length of zero is used to indicate that no palette was defined (because a palette with zero entries is not allowed). The
    // palette can contain from 1 to 256 entries.
    var palette_length: u9 = 0;
    var palette: [256]Color = undefined;

    // For color types without alpha channel (grayscale and RGB), a single constant color can optionally be provided to serve as a
    // background color, meaning that the specified color should be changed to fully transparent. The flag has_transparent_color is set to
    // true if such color was provided, in which case it's being stored in transparent_color array. The array has 16 bit RGB components to
    // acommodate for the highest possible bit depth, however, when lower bit depth is used, the colors in this array are using the same,
    // lower bit depth. For grayscale color type, there is a single grayscale value used as a transparent color, which is stored at every
    // index of the array.
    var has_transparent_color = false;
    var transparent_color = [_]u16{0, 0, 0};

    var compressed_data = ArrayList(u8).init(allocator);
    defer compressed_data.deinit();

    // Iterate over chunks until they end. We end the iteration after the last chunk (which should be the IEND chunk) or when we run out of
    // data, in which case it is a format error.
    while (true) {
        const chunk_length = readIntBigAdvance(u32, &remaining_data) catch return error.InvalidPng;
        const chunk_type = readIntBigAdvance(u32, &remaining_data) catch return error.InvalidPng;

        switch (chunk_type) {
            // IHDR chunk structure:
            // [4B] width: u32be
            // [4B] height: u32be
            // [1B] bit_depth: u8
            // [1B] color_type: u8
            // [1B] compression_method: u8
            // [1B] filter_method: u8
            // [1B] interlace_method: u8
            chunk("IHDR") => {
                // IHDR is required to be the first chunk.
                if (!at_first_chunk) {
                    return error.InvalidPng;
                }
                at_first_chunk = false;

                // IHDR has constant length of 13.
                if (chunk_length != 13) {
                    return error.InvalidPng;
                }

                ihdr.width = readIntBigAdvance(u32, &remaining_data) catch return error.InvalidPng;
                ihdr.height = readIntBigAdvance(u32, &remaining_data) catch return error.InvalidPng;
                ihdr.bit_depth = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
                ihdr.color_type = @intToEnum(ColorType, readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng);
                ihdr.compression_method = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
                ihdr.filter_method = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
                ihdr.interlace_method = @intToEnum(Interlace, readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng);

                // Zero is an invalid value for width and height.
                if (ihdr.width == 0 or ihdr.height == 0) {
                    return error.InvalidPng;
                }

                // Bit depth is the number of bits per sample or per palette index. Valid values are 1, 2, 4, 8 and 16, although not all
                // values are allowed for all color types. This is why we verify color type and bit depth at the same time.

                switch (ihdr.color_type) {
                    // For grayscale, every pixel is a single grayscale sample. All valid bit depths are allowed.
                    .grayscale => if (ihdr.bit_depth != 1
                        and ihdr.bit_depth != 2
                        and ihdr.bit_depth != 4
                        and ihdr.bit_depth != 8
                        and ihdr.bit_depth != 16) return error.InvalidPng,
                    // For RGB, every pixel has three samples. Only bit depths 8 and 16 are allowed.
                    .rgb => if (ihdr.bit_depth != 8
                        and ihdr.bit_depth != 16) return error.InvalidPng,
                    // For indexed, every pixel is an index into palette. Indexed color type requires a PLTE chunk to appear. Only bit
                    // depths 1, 2, 4 and 8 are allowed.
                    .indexed => if (ihdr.bit_depth != 1
                        and ihdr.bit_depth != 2
                        and ihdr.bit_depth != 4
                        and ihdr.bit_depth != 8) return error.InvalidPng,
                    // For grayscale with alpha, every pixel has two samples. Only bit depths 8 and 16 are allowed.
                    .grayscale_alpha => if (ihdr.bit_depth != 8
                        and ihdr.bit_depth != 16) return error.InvalidPng,
                    // For RGBA, every pixel has four samples. Only bit depths 8 and 16 are allowed.
                    .rgba => if (ihdr.bit_depth != 8
                        and ihdr.bit_depth != 16) return error.InvalidPng,
                    // Every other value for color type is invalid.
                    _ => return error.InvalidPng,
                }

                // Only compression method zero is allowed. The compression method zero means deflate/inflate compression with a sliding
                // window of at most 32768 bytes.
                if (ihdr.compression_method != 0) {
                    return error.InvalidPng;
                }

                // Only filter method zero is allowed. Filter method zero uses adaptive filtering with five basic filter types.
                if (ihdr.filter_method != 0) {
                    return error.InvalidPng;
                }

                // All valid interlace methods are specified in Interlace enum (there are two). We check if we got an unspecified interlace
                // method.
                if (@enumToInt(ihdr.interlace_method) >= 2) {
                    return error.InvalidPng;
                }
            },
            // PLTE chunk structure:
            // [3B * N] color: [N]{
            //     [1B] red: u8
            //     [1B] green: u8
            //     [1B] blue: u8
            // }
            // Where N is the chunk length divided by 3 with no remainder allowed.
            chunk("PLTE") => {
                // IHDR is required to be the first chunk.
                if (at_first_chunk) {
                    return error.InvalidPng;
                }

                // There can be only one PLTE chunk.
                if (palette_length != 0) {
                    return error.InvalidPng;
                }

                // The number of palette entries is determined by the chunk length. A chunk length not divisible by 3 is an error.
                palette_length = std.math.cast(u9, std.math.divExact(u32, chunk_length, 3) catch return error.InvalidPng) catch return error.InvalidPng;

                // The number of palette entries should be at least 1 and no higher than can be represented using specified bit depth.
                if (palette_length < 1 or palette_length > @as(u9, 1) << @intCast(u4, ihdr.bit_depth)) {
                    return error.InvalidPng;
                }

                // Note that we can have fewer entries than the bit depth would allow. In that case any out of range index is considered an
                // error.

                // PLTE chunk should not appear for grayscale and grayscale with alpha color types.
                if (ihdr.color_type == .grayscale or ihdr.color_type == .grayscale_alpha) {
                    return error.InvalidPng;
                }

                // Note that the PLTE chunk is allowed for RGB and RGBA color types. Its purpose is to suggest a set of colors to which the
                // image should be quantized, if it cannot be displayed with the full range of colors. That is not our concern, however we
                // should remember not to return an error.

                var i: usize = 0;
                while (i < palette_length) : (i += 1) {
                    // Note that palette format color is always RGB with 8 bits per component.
                    palette[i].r = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
                    palette[i].g = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
                    palette[i].b = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
                    // Although PLTE doesn't contain alpha information, we set and store an alpha value of 255 to provide a default. The
                    // reason why we have an alpha in the palette at all is because it can actually be provided, but in a different chunk
                    // (tRNS).
                    palette[i].a = 255;
                }
            },
            // tRNS chunk structure depends on color type:
            // - For grayscale:
            //   [2B] gray: u16
            // - For RGB:
            //   [2B] red: u16
            //   [2B] green: u16
            //   [2B] blue: u16
            // - For indexed:
            //   [N * 1B] alpha: [N]u8
            // Where N is the chunk length.
            chunk("tRNS") => {
                // IHDR is required to be the first chunk.
                if (at_first_chunk) {
                    return error.InvalidPng;
                }

                // tRNS should appear before IDAT.
                if (compressed_data.items.len > 0) {
                    return error.InvalidPng;
                }

                switch (ihdr.color_type) {
                    // For grayscale color type, tRNS chunk provides a single grayscale value which should be treated as transparent.
                    .grayscale => {
                        has_transparent_color = true;

                        // For grayscale, tRNS chunk should have exactly 2 bytes
                        if (chunk_length != 2) {
                            return error.InvalidPng;
                        }

                        // Note that while the value takes up 2 bytes, its bit depth matches the bit depth of the image.
                        const gray_transparent = readIntBigAdvance(u16, &remaining_data) catch return error.InvalidPng;
                        transparent_color[0] = gray_transparent;
                        transparent_color[1] = gray_transparent;
                        transparent_color[2] = gray_transparent;
                    },
                    // For RGB color type, tRNS chunk provides a single RGB color which should be treated as transparent.
                    .rgb => {
                        has_transparent_color = true;

                        // For RGB, tRNS chunk should have exactly 6 bytes
                        if (chunk_length != 6) {
                            return error.InvalidPng;
                        }

                        // Note that while the values takes up 2 bytes each, their bit depth matches the bit depth of the image.
                        transparent_color[0] = readIntBigAdvance(u16, &remaining_data) catch return error.InvalidPng;
                        transparent_color[1] = readIntBigAdvance(u16, &remaining_data) catch return error.InvalidPng;
                        transparent_color[2] = readIntBigAdvance(u16, &remaining_data) catch return error.InvalidPng;
                    },
                    // For indexed color type, tRNS chunk provides alpha values for palette entries.
                    .indexed => {
                        // tRNS chunk should appear after PLTE.
                        if (palette_length == 0) {
                            return error.InvalidPng;
                        }

                        // The chunk length is also the amount of alpha entries provided. There can be no more alpha entries than palette
                        // entries. However, there can be less, in which case the alpha value for remaining entries should be 255. We
                        // already ensured that by setting the alpha value while reading PLTE chunk.
                        if (chunk_length > palette_length) {
                            return error.InvalidPng;
                        }

                        var i: usize = 0;
                        while (i < chunk_length) : (i += 1) {
                            palette[i].a = readIntBigAdvance(u8, &remaining_data) catch return error.InvalidPng;
                        }
                    },
                    // tRNS chunk should not appear for grayscale with alpha and RGBA color types.
                    .grayscale_alpha, .rgba => return error.InvalidPng,
                    // Invalid color type should be rejected during IHDR chunk parsing.
                    _ => unreachable,
                }
            },
            // The IDAT chunk contains the compressed image data. There can be many consecutive chunks, in which case the compressed
            // datastream is a concatenation of the contents of all the IDAT chunks. Here we only copy the compressed data to decompress and
            // decode it at the end.
            chunk("IDAT") => {
                // IHDR is required to be the first chunk.
                if (at_first_chunk) {
                    return error.InvalidPng;
                }

                // Palette has to be specified before IDAT and if using indexed color type it is mandatory.
                if (ihdr.color_type == .indexed and palette_length == 0) {
                    return error.InvalidPng;
                }

                // IDAT chunks are appended to compressed_data to be later decompressed.
                if (remaining_data.len < chunk_length) {
                    return error.InvalidPng;
                }
                try compressed_data.appendSlice(remaining_data[0..chunk_length]);
                advanceUnchecked(&remaining_data, chunk_length);
            },
            chunk("IEND") => {
                // IHDR is required to be the first chunk.
                if (at_first_chunk) {
                    return error.InvalidPng;
                }

                // IEND chunk is empty.
                if (chunk_length != 0) {
                    return error.InvalidPng;
                }

                // There should be at least one IDAT before.
                if (compressed_data.items.len == 0) {
                    return error.InvalidPng;
                }

                // Make an initial guess for the size of the decompressed data to avoid reallocations.

                const samples_per_pixel: u3 = switch (ihdr.color_type) {
                    .grayscale => 1,
                    .rgb => 3,
                    .indexed => 1,
                    .grayscale_alpha => 2,
                    .rgba => 4,
                    _ => unreachable,
                };

                // Distance between scanlines. One scanline contains 1 byte for filtering type followed by tightly packed samples.
                const stride = 1 + (ihdr.width * ihdr.bit_depth * samples_per_pixel + 7) / 8;
                const bytes_total = ihdr.height * stride;

                var decompressed_data = try ArrayList(u8).initCapacity(allocator, bytes_total);
                defer decompressed_data.deinit();

                decompressData(compressed_data.items, &decompressed_data) catch |err| switch (err) {
                    error.InvalidZlib => return error.InvalidPng,
                    error.OutOfMemory => return error.OutOfMemory,
                };

                // Not enough data.
                if (decompressed_data.items.len < bytes_total) {
                    return error.InvalidPng;
                }

                try defilter(&ihdr, decompressed_data.items);

                // Convert decompressed data to desired output format.

                const out_stride = ihdr.width * channelCount(channels);
                const out = try allocator.alloc(u8, ihdr.height * out_stride);
                errdefer allocator.free(out);

                var y: usize = 0;
                while (y < ihdr.height) : (y += 1) {
                    const scanline = decompressed_data.items[stride * y + 1 .. stride * (y + 1)];
                    const out_scanline = out[out_stride * y .. out_stride * (y + 1)];

                    var out_pixel = Color{
                        .r = undefined,
                        .g = undefined,
                        .b = undefined,
                        .a = 255,
                    };

                    // TODO use tRNS information (has_transparent_color and transparent_color), remember to compare transparent_color using
                    // source bit depth.

                    var x: usize = 0;
                    while (x < ihdr.width) : (x += 1) {
                        switch (ihdr.color_type) {
                            .grayscale => switch (ihdr.bit_depth) {
                                1 => {
                                    const shift = ~@truncate(u3, x);
                                    const c = @as(u8, @truncate(u1, scanline[x / 8] >> shift)) * 0xFF;
                                    out_pixel.r = c;
                                    out_pixel.g = c;
                                    out_pixel.b = c;
                                },
                                2 => {
                                    const shift = @as(u3, ~@truncate(u2, x)) << 1;
                                    const c = @as(u8, @truncate(u2, scanline[x / 4] >> shift)) * 0x55;
                                    out_pixel.r = c;
                                    out_pixel.g = c;
                                    out_pixel.b = c;
                                },
                                4 => {
                                    const shift = @as(u3, ~@truncate(u1, x)) << 2;
                                    const c = @as(u8, @truncate(u4, scanline[x / 2] >> shift)) * 0x11;
                                    out_pixel.r = c;
                                    out_pixel.g = c;
                                    out_pixel.b = c;
                                },
                                8 => {
                                    const c = scanline[x];
                                    out_pixel.r = c;
                                    out_pixel.g = c;
                                    out_pixel.b = c;
                                },
                                16 => {
                                    const c =  @intCast(u8, std.mem.readIntSliceBig(u16, scanline[2 * x..]) >> 8);
                                    out_pixel.r = c;
                                    out_pixel.g = c;
                                    out_pixel.b = c;
                                },
                                else => unreachable,
                            },
                            .rgb => switch (ihdr.bit_depth) {
                                8 => {
                                    out_pixel.r = scanline[3 * x];
                                    out_pixel.g = scanline[3 * x + 1];
                                    out_pixel.b = scanline[3 * x + 2];
                                },
                                16 => {
                                    out_pixel.r = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[6 * x..]) >> 8);
                                    out_pixel.g = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[6 * x + 2..]) >> 8);
                                    out_pixel.b = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[6 * x + 4..]) >> 8);
                                },
                                else => unreachable,
                            },
                            .indexed => switch (ihdr.bit_depth) {
                                1 => {
                                    const shift = ~@truncate(u3, x);
                                    const ind = @as(u8, @truncate(u1, scanline[x / 8] >> shift));
                                    if (ind >= palette_length) return error.InvalidPng;
                                    out_pixel = palette[ind];
                                },
                                2 => {
                                    const shift = @as(u3, ~@truncate(u2, x)) << 1;
                                    const ind = @as(u8, @truncate(u2, scanline[x / 4] >> shift));
                                    if (ind >= palette_length) return error.InvalidPng;
                                    out_pixel = palette[ind];
                                },
                                4 => {
                                    const shift = @as(u3, ~@truncate(u1, x)) << 2;
                                    const ind = @as(u8, @truncate(u4, scanline[x / 2] >> shift));
                                    if (ind >= palette_length) return error.InvalidPng;
                                    out_pixel = palette[ind];
                                },
                                8 => {
                                    const ind = scanline[x];
                                    if (ind >= palette_length) return error.InvalidPng;
                                    out_pixel = palette[ind];
                                },
                                else => unreachable,
                            },
                            .grayscale_alpha => switch (ihdr.bit_depth) {
                                8 => {
                                    const c = scanline[2 * x];
                                    const a = scanline[2 * x + 1];
                                    out_pixel.r = c;
                                    out_pixel.g = c;
                                    out_pixel.b = c;
                                    out_pixel.a = a;
                                },
                                16 => {
                                    const c = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[4 * x..]) >> 8);
                                    const a = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[4 * x + 2..]) >> 8);
                                    out_pixel.r = c;
                                    out_pixel.g = c;
                                    out_pixel.b = c;
                                    out_pixel.a = a;
                                },
                                else => unreachable,
                            },
                            .rgba => switch (ihdr.bit_depth) {
                                8 => {
                                    out_pixel.r = scanline[4 * x];
                                    out_pixel.g = scanline[4 * x + 1];
                                    out_pixel.b = scanline[4 * x + 2];
                                    out_pixel.a = scanline[4 * x + 3];
                                },
                                16 => {
                                    out_pixel.r = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[8 * x..]) >> 8);
                                    out_pixel.g = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[8 * x + 2..]) >> 8);
                                    out_pixel.b = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[8 * x + 4..]) >> 8);
                                    out_pixel.a = @intCast(u8, std.mem.readIntSliceBig(u16, scanline[8 * x + 6..]) >> 8);
                                },
                                else => unreachable,
                            },
                            _ => unreachable,
                        }

                        switch (channels) {
                            .r => out_scanline[x] = out_pixel.r,
                            .g => out_scanline[x] = out_pixel.g,
                            .b => out_scanline[x] = out_pixel.b,
                            .a => out_scanline[x] = out_pixel.a,
                            .rg => {
                                out_scanline[2 * x] = out_pixel.r;
                                out_scanline[2 * x + 1] = out_pixel.g;
                            },
                            .rgb => {
                                out_scanline[3 * x] = out_pixel.r;
                                out_scanline[3 * x + 1] = out_pixel.g;
                                out_scanline[3 * x + 2] = out_pixel.b;
                            },
                            .rgba => {
                                out_scanline[4 * x] = out_pixel.r;
                                out_scanline[4 * x + 1] = out_pixel.g;
                                out_scanline[4 * x + 2] = out_pixel.b;
                                out_scanline[4 * x + 3] = out_pixel.a;
                            }
                        }
                    }
                }

                widthPtr.* = ihdr.width;
                heightPtr.* = ihdr.height;

                return out;
            },
            // Unrecognized chunk. We can skip it unless its a critical chunk.
            else => {
                // IHDR is required to be the first chunk.
                if (at_first_chunk) {
                    return error.InvalidPng;
                }

                // If the 29th bit of the chunk type is set, the chunk is considered to be a critical chunk. We should reject any image with
                // a critical chunk that we don't recognize.
                if (chunk_type & (1 << 29) == 0) {
                    return error.NotImplemented;
                }

                // Skip over the data.
                advance(&remaining_data, chunk_length) catch return error.InvalidPng;
            }
        }
        // Every PNG chunk ends with CRC32. The checksum is not included in the chunk length. We're skipping it without verifying.
        advance(&remaining_data, 4) catch return error.InvalidPng;
    }
}

fn defilter(ihdr: *IHDR, decompressed_data: []u8) !void {
    if (ihdr.interlace_method != .none) {
        return error.NotImplemented; // TODO
    }

    const samples_per_pixel: u3 = switch (ihdr.color_type) {
        .grayscale => 1,
        .rgb => 3,
        .indexed => 1,
        .grayscale_alpha => 2,
        .rgba => 4,
        _ => unreachable,
    };

    const bytes_per_scanline = (ihdr.width * ihdr.bit_depth * samples_per_pixel + 7) / 8;
    const stride = 1 + bytes_per_scanline;

    if (decompressed_data.len < ihdr.height * stride) {
        return error.InvalidPng;
    }

    const input_channels: u3 = switch (ihdr.color_type) {
        .grayscale => 1,
        .rgb => 3,
        .indexed => 1,
        .grayscale_alpha => 2,
        .rgba => 4,
        _ => unreachable,
    };

    // How many bytes do we look back to get the "corresponding" byte of the pixel to the left.
    const pixel_stride = input_channels * ((ihdr.bit_depth + 7) / 8);

    var y: usize = 0;
    while (y < ihdr.height) : (y += 1) {
        const filter = @intToEnum(Filter, decompressed_data[stride * y]);

        if (@enumToInt(filter) >= 5) {
            return error.InvalidPng;
        }

        const scanline = decompressed_data[stride * y + 1 .. stride * (y + 1)];
        const last_scanline = if (y == 0) decompressed_data[0..0] else decompressed_data[stride * (y - 1) + 1 .. stride * y];

        const real_filter = if (y == 0) switch (filter) {
            .up => .none,
            .average => .average_first_row,
            .paeth => .paeth_first_row,
            else => filter,
        } else filter;

        // Handle first pixel.

        var i: usize = 0;

        switch (real_filter) {
            .none => i = pixel_stride,
            .sub => i = pixel_stride,
            .up => while (i < pixel_stride) : (i += 1) {
                scanline[i] = last_scanline[i];
            },
            .average => while (i < pixel_stride) : (i += 1) {
                scanline[i] +%= last_scanline[i] >> 1;
            },
            .paeth => while (i < pixel_stride) : (i += 1) {
                scanline[i] +%= paeth(0, last_scanline[i], 0);
            },
            .average_first_row => i = pixel_stride,
            .paeth_first_row => i = pixel_stride,
            _ => unreachable,
        }

        // Handle rest of the scanline.

        switch (real_filter) {
            .none => {},
            .sub => while (i < bytes_per_scanline) : (i += 1) {
                scanline[i] +%= scanline[i - pixel_stride];
            },
            .up => while (i < bytes_per_scanline) : (i += 1) {
                scanline[i] +%= last_scanline[i];
            },
            .average => while (i < bytes_per_scanline) : (i += 1) {
                scanline[i] +%= @intCast(u8, @as(u9, scanline[i - pixel_stride] + last_scanline[i]) >> 1);
            },
            .paeth => while (i < bytes_per_scanline) : (i += 1) {
                scanline[i] +%= paeth(scanline[i - pixel_stride], last_scanline[i], last_scanline[i - pixel_stride]);
            },
            .average_first_row => while (i < bytes_per_scanline) : (i += 1) {
                scanline[i] +%= scanline[i - pixel_stride] >> 1;
            },
            .paeth_first_row => while (i < bytes_per_scanline) : (i += 1) {
                scanline[i] +%= paeth(scanline[i - pixel_stride], 0, 0);
            },
            _ => unreachable,
        }
    }
}

fn paeth(a: i16, b: i16, c: i16) u8 {
    const p = a + b - c;
    const pa = std.math.absInt(p - a) catch unreachable;
    const pb = std.math.absInt(p - b) catch unreachable;
    const pc = std.math.absInt(p - c) catch unreachable;
    if (pa <= pb and pa <= pc) return @intCast(u8, a);
    if (pb <= pc) return @intCast(u8, b);
    return @intCast(u8, c);
}

// --- ZLIB PORT ---------------------------------------------------------------

// Refer to RFC 1950 and 1951 if you want to have any idea what is happening here.

// TODO This entire section is an overflow hell. Where can we trust the values that we have? Where an overflow could occur and when are we
// sure it won't? Which int casts are safe?

const Zlib = struct {
    data: []const u8,
    remaining_data: []const u8,

    // buffer for unpacking input data bit by bit
    remaining_bits: u32 = 0,
    remaining_bits_count: u6 = 0, // [0, 32]

    out: *ArrayList(u8),

    /// Discards bits to be byte-aligned.
    pub fn alignToByte(self: *Zlib) void {
        const to_discard = @truncate(u3, self.remaining_bits_count);

        self.remaining_bits >>= to_discard;
        self.remaining_bits_count -= to_discard;
    }

    /// Refils internal bit buffer. Never fails, instead it won't refill fully when data ends.
    pub fn refillBits(self: *Zlib) void {
        while (self.remaining_bits_count < 24 and self.remaining_data.len > 0) {
            const byte = self.remaining_data[0];
            advanceUnchecked(&self.remaining_data, 1);

            self.remaining_bits |= @as(u32, byte) << @intCast(u5, self.remaining_bits_count);
            self.remaining_bits_count += 8;
        }
    }

    /// Get a runtime-known amount of bits (up to 16).
    pub fn getBits(self: *Zlib, bit_count: u5) !u16 {
        std.debug.assert(bit_count <= 16);

        if (self.remaining_bits_count < bit_count) {
            self.refillBits();

            // If we still have less bits than requested, it means we ran out of data.
            if (self.remaining_bits_count < bit_count) {
                return error.InvalidZlib;
            }
        }

        const ret = @intCast(u16, self.remaining_bits & ((@as(u32, 1) << bit_count) - 1));

        self.remaining_bits >>= bit_count;
        self.remaining_bits_count -= bit_count;

        return ret;
    }

    /// Get a comptime-known amount of bits (up to 16).
    pub fn getBitsCt(self: *Zlib, comptime T: type) !T {
        const bit_count = @typeInfo(T).Int.bits;
        // TODO We could detect wrong signedness and bit count at compile time to produce a compiler error.
        return @intCast(T, try self.getBits(bit_count));
    }
};

// symbol - decoded value (e.g. literal byte, length, distance), up to 9 bits
// code - code for value, variable bit length
// bit count - bit length of a code
// ordinal - index of a symbol if you stable sort the symbols by their bit count
const Huffman = struct {
    /// Use next 9 bits as an index into this array. If value is different than 0, there is a fast path to decode next code. The symbol
    /// value is in bits 0-8. The higher bits contain bit count. For a code with bit count higher than 9 you won't find it in this table and
    /// will have to resort to the slow path.
    fast_lut: [1 << fast_bits]u16,

    /// First code for a given bit count.
    first_code: [16]u17,
    /// Biggest code value for a given bit count.
    max_code: [16]u17,

    /// First ordinal for a given bit count.
    first_symbol: [16]u17,
    /// Bit count for a code that encodes given ordinal.
    bit_count: [symbol_count]u8,
    /// Symbol that corresponds to a given ordinal.
    symbol: [symbol_count]u16,

    const fast_bits = 9;
    const fast_mask = (1 << fast_bits) - 1;
    const symbol_count = 288;

    /// Predefined bit counts for codes that encode symbols in joined value-length alphabet.
    const predefined_val_len_bit_count = [_]u8 {
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8,
    };

    /// Predefined bit counts for codes that encode symbols in distance alphabet.
    const predefined_dist_bit_count = [_]u8 {
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    };

    /// Precomputed Huffman structure for predefined joined value-length alphabet.
    const predefined_val_len_huffman: Huffman = blk: {
        @setEvalBranchQuota(10000);
        var huffman: Huffman = undefined;
        huffman.build(&predefined_val_len_bit_count) catch unreachable;
        break :blk huffman;
    };

    /// Precomputed Huffman structure for predefined distance alphabet.
    const predefined_dist_huffman: Huffman = blk: {
        @setEvalBranchQuota(10000);
        var huffman: Huffman = undefined;
        huffman.build(&predefined_dist_bit_count) catch unreachable;
        break :blk huffman;
    };

    // RFC 1951 (page 8)
    fn build(self: *Huffman, bit_count: []const u8) !void {

        // Clear the fast LUT.

        std.mem.set(u16, &self.fast_lut, 0);

        // Count the number of codes for each bit length.

        var code_count = [_]u16{0} ** 16;
        for (bit_count) |bc| {
            code_count[bc] += 1; // TODO Can we trust this value to be a small enough index?
        }
        // Codes that are never used receive bit count of zero. We must not assign them codes, so we pretend they don't exist.
        code_count[0] = 0;

        // There can be no more codes for a given bit count than the number of bits would allow.
        for (code_count) |cc, i| {
            if (cc > (@as(u16, 1) << @intCast(u4, i))) {
                return error.InvalidZlib;
            }
        }

        // Find the value of the smallest code for each bit count.

        var next_code: [16]u17 = undefined;
        var code: u17 = 0;

        var symbol: u16 = 0;
        var bc: u4 = 1;
        while (true) {
            next_code[bc] = code;
            self.first_code[bc] = code;
            self.first_symbol[bc] = symbol;
            code += code_count[bc];
            if (code_count[bc] != 0 and code - 1 >= @as(u17, 1) << bc) {
                return error.InvalidZlib;
            }
            self.max_code[bc] = code << (~bc + 1); // preshift for inner loop
            code <<= 1;
            symbol += code_count[bc];

            if (bc == 15) break;
            bc += 1;
        }

        symbol = 0;
        while (symbol < bit_count.len) : (symbol += 1) {
            bc = @intCast(u4, bit_count[symbol]);

            if (bc == 0) {
                continue;
            }

            const ordinal = next_code[bc] - self.first_code[bc] + self.first_symbol[bc];
            const fast_value = (@as(u16, bc) << 9) | symbol;

            self.bit_count[ordinal] = bc;
            self.symbol[ordinal] = symbol;
            if (bc <= fast_bits) {
                code = @bitReverse(u9, @intCast(u9, next_code[bc])) >> (9 - bc);
                while (code < (1 << fast_bits)) : (code += @as(u16, 1) << bc) {
                    self.fast_lut[code] = fast_value;
                }
            }
            next_code[bc] += 1;
        }
    }
};

pub fn decompressData(data: []const u8, out: *ArrayList(u8)) !void {
    var zlib = Zlib{
        .data = data,
        .remaining_data = data,
        .out = out,
    };

    // ZLIB structure:
    // [1B] compression_method_and_flags: u8
    // [1B] flags: u8
    // if (preset_dictionary) [4B] preset_dictionary: u32
    // [?] compressed_data
    // [4B] adler32: u32
    // Where preset_dictionary is a flag on bit 5 of flags.

    // Parsing header.

    const compression_method_and_flags = try zlib.getBitsCt(u8);
    const compression_method = compression_method_and_flags & 0b0000_1111;
    // Bits 4-7 in compression_method_and_flags contain compression info, from which we can get LZ77 window size. We don't use that
    // information.

    const flags = try zlib.getBitsCt(u8);
    //const check_bits = flags & 0b0001_1111;
    const preset_dictionary = flags & 0b0010_0000 != 0;

    // check_bits value (per spec) must be such that compression_method_and_flags and flags, when viewed as a 16-bit unsigned integer stored
    // in MSB order (compression_method_and_flags * 256 + flags) is a multiple of 31.
    if ((@as(u16, compression_method_and_flags) * 256 + flags) % 31 != 0) {
        return error.InvalidZlib;
    }

    // When the zlib data format is being used as a part of another standard format, a compliant decompressor must support all the preset
    // dictionaties specified by the other format. The other format (being PNG in our case) specifies that the preset dictionary is not
    // allowed.
    if (preset_dictionary) {
        return error.InvalidZlib;
    }

    // Compression method 8 is deflate, which is required by PNG.
    if (compression_method != 8) {
        return error.InvalidZlib;
    }

    // Parsing compressed data.

    while (true) {
        const final = (try zlib.getBitsCt(u1)) != 0;
        const block_type = try zlib.getBitsCt(u2);

        switch (block_type) {
            // no compression
            0 => try parseUncompressedBlock(&zlib),
            // compressed with fixed Huffman codes
            1 => {
                try parseHuffmanBlock(
                    &zlib,
                    &Huffman.predefined_val_len_huffman,
                    &Huffman.predefined_dist_huffman
                );
            },
            // compressed with dynamic Huffman codes
            2 => {
                var val_len_huffman: Huffman = undefined;
                var dist_huffman: Huffman = undefined;
                try computeHuffmanCodes(&zlib, &val_len_huffman, &dist_huffman);
                try parseHuffmanBlock(&zlib, &val_len_huffman, &dist_huffman);
            },
            // reserved value (error)
            3 => return error.InvalidZlib,
        }

        if (final) {
            break;
        }
    }
}

fn parseUncompressedBlock(zlib: *Zlib) !void {
    zlib.alignToByte();
    zlib.refillBits();

    if (zlib.remaining_bits_count < 32) {
        // Assert that we actually ran out of data, not refilled to a lower amount beacuse calling alignToByte didn't align.
        std.debug.assert(zlib.remaining_bits_count & 7 == 0 and zlib.remaining_data.len == 0);
        return error.InvalidZlib;
    }

    // Manually consume next 32 bits to read two u16 numbers.

    const len = @byteSwap(u16, @intCast(u16, zlib.remaining_bits & 0xFFFF)); // block content length
    const nlen = @byteSwap(u16, @intCast(u16, zlib.remaining_bits >> 16)); // one's complement (bit not) of len

    zlib.remaining_bits = 0;
    zlib.remaining_bits_count = 0;

    if (nlen != ~len) {
        return error.InvalidZlib;
    }

    if (zlib.remaining_data.len < len) {
        return error.InvalidZlib;
    }

    try zlib.out.appendSlice(zlib.remaining_data[0..len]);
    advanceUnchecked(&zlib.remaining_data, len);
}

const internal_symbol_order = [_]u8 {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};

// RFC 1951 (page 13-14)
fn computeHuffmanCodes(zlib: *Zlib, val_len_huffman: *Huffman, dist_huffman: *Huffman) !void {
    // Arrays of bit counts for joined value-length alphabet and for distance alphabet are themselves encoded using dynamic Huffman
    // encoding, which we call here the internal Huffman.
    var internal_huffman: Huffman = undefined;

    // Number of value-length symbols in use, [257, 286].
    const val_len_symbol_count = (try zlib.getBits(5)) + 257; // TODO check this value or add overflow padding to bit_count?
    // Number of distance symbols in use, [1, 32].
    const dist_symbol_count = (try zlib.getBits(5)) + 1;
    // Number of internal symbols in use, [4, 19].
    const internal_symbol_count = (try zlib.getBits(4)) + 4;

    // Reading bit counts for the internal Huffman encoding. Thankfully, these bit counts are not Huffman encoded.

    var internal_bit_count = [_]u8{0} ** 19;
    var i: u16 = 0;
    while (i < internal_symbol_count) : (i += 1) {
        const bc = try zlib.getBitsCt(u3);
        internal_bit_count[internal_symbol_order[i]] = bc;
    }

    try internal_huffman.build(&internal_bit_count);

    // Internal Huffman alphabet:
    // 0-15: Literal value 0-15
    //   16: Repeat previous value 3 + (value of 2 next bits) times
    //   17: Literal value 0 times 3 + (value of 3 next bits)
    //   18: Literal value 0 times 11 + (value of 7 next bits)

    // Bit count table joined for all alphabets, because repeats can cross over dicionary boundary.
    var bit_count: [286 + 32]u8 = undefined;
    const total_symbol_count = val_len_symbol_count + dist_symbol_count;
    i = 0;
    while (i < total_symbol_count) {
        var symbol = @intCast(u8, try decodeHuffman(zlib, &internal_huffman));
        switch (symbol) {
            // literal
            0 ... 15 => {
                bit_count[i] = symbol;
                i += 1;
            },
            // repeat previous
            16 => {
                if (i == 0) {
                    return error.InvalidZlib;
                }
                const len = (try zlib.getBits(2)) + 3;
                if (i + len > total_symbol_count) return error.InvalidZlib;
                std.mem.set(u8, bit_count[i .. i + len], bit_count[i - 1]);
                i += len;
            },
            // multiple literal 0
            17 => {
                const len = (try zlib.getBits(3)) + 3;
                if (i + len > total_symbol_count) return error.InvalidZlib;
                std.mem.set(u8, bit_count[i .. i + len], 0);
                i += len;
            },
            // multiple literal 0
            18 => {
                const len = (try zlib.getBits(7)) + 11;
                if (i + len > total_symbol_count) return error.InvalidZlib;
                std.mem.set(u8, bit_count[i .. i + len], 0);
                i += len;
            },
            else => return error.InvalidZlib,
        }
    }

    if (i != total_symbol_count) {
        return error.InvalidZlib;
    }

    try val_len_huffman.build(bit_count[0..val_len_symbol_count]);
    try dist_huffman.build(bit_count[val_len_symbol_count .. val_len_symbol_count + dist_symbol_count]);
}

fn parseHuffmanBlock(zlib: *Zlib, val_len_huffman: *const Huffman, dist_huffman: *const Huffman) !void {
    while (true) {
        const val_len_symbol: u16 = try decodeHuffman(zlib, val_len_huffman);
        // byte literal
        if (val_len_symbol < 256) {
            try zlib.out.append(@intCast(u8, val_len_symbol));
        // end-of-block code
        } else if (val_len_symbol == 256) {
            return;
        // copy bytes from before
        } else {
            // read length
            const len_ind = val_len_symbol - 257;
            const len: u16 = len_base[len_ind] + try zlib.getBits(len_extra_bits[len_ind]);

            // read distance
            const dist_symbol = try decodeHuffman(zlib, dist_huffman);
            const dist = dist_base[dist_symbol] + try zlib.getBits(dist_extra_bits[dist_symbol]);

            if (zlib.out.items.len < dist) {
                return error.InvalidZlib;
            }

            var ptr = zlib.out.items.ptr + zlib.out.items.len - dist;
            // Run of one byte, common in images. We can reuse the same value instead of moving ptr.
            if (dist == 1) {
                const value = ptr[0];
                try zlib.out.appendNTimes(value, len);
            } else {
                try zlib.out.ensureUnusedCapacity(len);
                var i: u16 = 0;
                while (i < len) : (i += 1) {
                    const value = ptr[0];
                    zlib.out.appendAssumeCapacity(value);
                    ptr += 1;
                }
            }
        }
    }
}

const len_base = [_]u16 {
    3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0,
};

const len_extra_bits = [_]u5 {
    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0,
};

const dist_base = [_]u32 {
    1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0,
};

const dist_extra_bits = [_]u5 {
    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
};

fn decodeHuffman(zlib: *Zlib, huffman: *const Huffman) !u16 {
    zlib.refillBits();

    // We shouldn't run out of data in the context of decoding a code. That's because after blocks end there is a 32 bit checksum.
    if (zlib.remaining_bits_count < 24) {
        return error.InvalidZlib;
    }

    const fast_value: u16 = huffman.fast_lut[zlib.remaining_bits & Huffman.fast_mask];

    // Fast path (lookup table).

    if (fast_value != 0) {
        var bit_count = @intCast(u5, fast_value >> 9);
        zlib.remaining_bits >>= bit_count;
        zlib.remaining_bits_count -= bit_count;
        return fast_value & 0x01FF;
    }

    // Slow path.

    const next_bits: u16 = @bitReverse(u16, @truncate(u16, zlib.remaining_bits));

    var bit_count: u4 = Huffman.fast_bits + 1;
    while (true) {
        if (next_bits < huffman.max_code[bit_count]) {
            break;
        }

        // Invalid code.
        if (bit_count == 15) {
            return error.InvalidZlib;
        }
        bit_count += 1;
    }

    const ordinal = (next_bits >> (~bit_count + 1)) - huffman.first_code[bit_count] + huffman.first_symbol[bit_count];
    if (ordinal >= Huffman.symbol_count) {
        return error.InvalidZlib;
    }

    // From stbi_image.h: "was originally an assert, but report failure instead."
    if (huffman.bit_count[ordinal] != bit_count) {
        return error.InvalidZlib;
    }

    zlib.remaining_bits >>= bit_count;
    zlib.remaining_bits_count -= bit_count;
    return huffman.symbol[ordinal];
}