Skip to content

Instantly share code, notes, and snippets.

@bqqbarbhg
Last active August 7, 2023 04:12
Show Gist options
  • Save bqqbarbhg/dfb26b88f9fa642e493737d9c8298ce2 to your computer and use it in GitHub Desktop.
Save bqqbarbhg/dfb26b88f9fa642e493737d9c8298ce2 to your computer and use it in GitHub Desktop.
#define _CRT_SECURE_NO_WARNINGS
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_RESIZE_IMPLEMENTATION
#include "stb_image_resize.h"
#include <stdint.h>
#include <stdio.h>
#include <stdbool.h>
#include <assert.h>
#define ArraySize(arr) (sizeof(arr)/sizeof(*(arr)))
const int8_t etc2_alpha_modifiers[][8] = {
{ -3, -6, -9, -15, 2, 5, 8, 14, },
{ -3, -7, -10, -13, 2, 6, 9, 12, },
{ -2, -5, -8, -13, 1, 4, 7, 12, },
{ -2, -4, -6, -13, 1, 3, 5, 12, },
{ -3, -6, -8, -12, 2, 5, 7, 11, },
{ -3, -7, -9, -11, 2, 6, 8, 10, },
{ -4, -7, -8, -11, 3, 6, 7, 10, },
{ -3, -5, -8, -11, 2, 4, 7, 10, },
{ -2, -6, -8, -10, 1, 5, 7, 9, },
{ -2, -5, -8, -10, 1, 4, 7, 9, },
{ -2, -4, -8, -10, 1, 3, 7, 9, },
{ -2, -5, -7, -10, 1, 4, 6, 9, },
{ -3, -4, -7, -10, 2, 3, 6, 9, },
{ -1, -2, -3, -10, 0, 1, 2, 9, },
{ -4, -6, -8, -9, 3, 5, 7, 8, },
{ -3, -5, -7, -9, 2, 4, 6, 8, },
};
inline int32_t ClampU11(int32_t value)
{
if (value < 0) return 0;
if (value > 2047) return 2047;
return value;
}
inline uint32_t MinU32(uint32_t a, uint32_t b)
{
return a < b ? a : b;
}
inline uint32_t MaxU32(uint32_t a, uint32_t b)
{
return a < b ? b : a;
}
inline int32_t EacExpandMultiplier(int32_t multiplier)
{
return multiplier ? multiplier * 8 : 1;
}
typedef struct eac_mode_dec {
int32_t base;
const int16_t *table;
} eac_mode_dec;
typedef struct eac_tables {
int16_t table_multiplier[16][16][8];
} eac_tables;
void EacInitTables(eac_tables *tables)
{
for (uint32_t table_index = 0; table_index < 16; table_index++) {
const int8_t *src = etc2_alpha_modifiers[table_index];
for (uint32_t multiplier = 0; multiplier < 16; multiplier++) {
int32_t multiplier_value = EacExpandMultiplier(multiplier);
int16_t *dst = tables->table_multiplier[table_index][multiplier];
for (uint32_t i = 0; i < 8; i++) {
dst[i] = (int16_t)(src[i] * multiplier_value);
}
}
}
}
inline eac_mode_dec EacDecodeInit(const eac_tables *tables, uint32_t base_codeword, uint32_t table_index, uint32_t multiplier)
{
eac_mode_dec dec;
dec.base = (int32_t)base_codeword * 8 + 4;
dec.table = tables->table_multiplier[table_index][multiplier];
return dec;
}
// Decode an EAC value, result in [0, 2047]
inline int32_t EacDecodeU11(eac_mode_dec mode, int32_t index)
{
return ClampU11(mode.base + mode.table[index]);
}
inline int32_t AbsI32(int32_t a)
{
return a >= 0 ? a : -a;
}
uint64_t EacCompressFit(eac_mode_dec mode, const int32_t *block_u11, int32_t *p_error)
{
uint64_t bits = 0;
int32_t total_err = 0;
int32_t max_err = *p_error;
for (uint32_t px = 0; px < 16; px++) {
int32_t best_err = INT32_MAX;
uint32_t best_ti = 0;
int32_t ref = block_u11[px];
for (uint32_t ti = 0; ti < 8; ti++) {
int32_t err = AbsI32(ref - EacDecodeU11(mode, ti));
if (err < best_err) {
best_err = err;
best_ti = ti;
}
}
bits |= (uint64_t)best_ti << ((15 - px) * 3);
total_err += best_err;
if (total_err >= max_err) return 0;
}
*p_error = total_err;
return bits;
}
int32_t EacErrorToCompressed(const eac_tables *tables, uint64_t packed, const int32_t *block_u11)
{
uint32_t base_codeword = (uint32_t)(packed >> 56) & 0xff;
uint32_t multiplier = (uint32_t)(packed >> 52) & 0xf;
uint32_t table_index = (uint32_t)(packed >> 48) & 0xf;
eac_mode_dec mode = EacDecodeInit(tables, base_codeword, table_index, multiplier);
int32_t total_err = 0;
for (uint32_t px = 0; px < 16; px++) {
int32_t ref = block_u11[px];
uint32_t ti = (packed >> ((15 - px) * 3)) & 0x7;
total_err += AbsI32(ref - EacDecodeU11(mode, ti));
}
return total_err;
}
uint64_t EacCompressSimple(const eac_tables *tables, const uint16_t *src, int32_t *p_error)
{
int32_t block_u11[16];
uint32_t min_v = UINT32_MAX, max_v = 0;
for (uint32_t px = 0; px < 16; px++) {
min_v = MinU32(min_v, src[px]);
max_v = MaxU32(max_v, src[px]);
block_u11[px] = (int32_t)(src[px] >> 5);
}
uint32_t base_codeword = (min_v + max_v) / 2 >> 8;
int32_t best_error = *p_error;
uint64_t best_bits = 0;
for (uint32_t table_index = 0; table_index < 16; table_index++) {
for (uint32_t multiplier = 0; multiplier < 16; multiplier++) {
eac_mode_dec mode = EacDecodeInit(tables, base_codeword, table_index, multiplier);
int32_t err = best_error;
uint64_t pixel_bits = EacCompressFit(mode, block_u11, &err);
if (err < best_error) {
best_error = err;
best_bits = pixel_bits
| (uint64_t)base_codeword << 56
| (uint64_t)multiplier << 52
| (uint64_t)table_index << 48;
}
}
}
*p_error = best_error;
return best_bits;
}
typedef struct image {
uint16_t *pixels;
uint32_t stride, channels;
uint32_t width, height;
} image;
uint16_t ImageGetU16(const image *img, uint32_t x, uint32_t y, uint32_t c)
{
x = MinU32(x, img->width - 1);
y = MinU32(y, img->height - 1);
return img->pixels[(y * img->stride + x) * img->channels + c];
}
void EacCompressBlock(const eac_tables *tables, void *dst, const image *img, uint32_t block_x, uint32_t block_y, uint32_t channel)
{
uint16_t block[16];
uint32_t base_x = block_x * 4;
uint32_t base_y = block_y * 4;
// ETC block layout is vertical so need to load transposed
for (uint32_t y = 0; y < 4; y++) {
for (uint32_t x = 0; x < 4; x++) {
block[x * 4 + y] = ImageGetU16(img, base_x + x, base_y + y, channel);
}
}
int32_t error = INT32_MAX;
uint64_t result = EacCompressSimple(tables, block, &error);
char *dst_p = (char*)dst;
for (uint32_t i = 0; i < 8; i++) {
dst_p[i] = (char)(result >> (56 - i * 8));
}
}
int CompareU16(const void *va, const void *vb)
{
const uint16_t a = *(const uint16_t*)va, b = *(const uint16_t*)vb;
if (a != b) return a < b ? -1 : 1;
return 0;
}
uint64_t BC4CompressFit(const uint16_t *src, int32_t lo, int32_t hi, bool swap, int32_t *p_error)
{
if ((hi >> 8) == (lo >> 8)) {
if (hi < 0x8000) {
hi += 0x100;
} else {
lo -= 0x100;
}
}
int32_t values[8];
if (swap) {
values[0] = lo;
values[1] = hi;
values[2] = (4*lo + 1*hi) / 5;
values[3] = (3*lo + 2*hi) / 5;
values[4] = (2*lo + 3*hi) / 5;
values[5] = (1*lo + 4*hi) / 5;
values[6] = 0;
values[7] = 0xffff;
} else {
values[0] = hi;
values[1] = lo;
values[2] = (6*hi + 1*lo) / 7;
values[3] = (5*hi + 2*lo) / 7;
values[4] = (4*hi + 3*lo) / 7;
values[5] = (3*hi + 4*lo) / 7;
values[6] = (2*hi + 5*lo) / 7;
values[7] = (1*hi + 6*lo) / 7;
}
uint64_t bits = 0;
int32_t total_err = 0;
int32_t max_err = *p_error;
for (uint32_t px = 0; px < 16; px++) {
int32_t best_err = INT32_MAX;
uint32_t best_ti = 0;
int32_t ref = src[px];
for (uint32_t ti = 0; ti < 8; ti++) {
int32_t err = AbsI32(ref - values[ti]);
if (err < best_err) {
best_err = err;
best_ti = ti;
}
}
bits |= (uint64_t)best_ti << (px * 3);
total_err += best_err;
if (total_err >= max_err) return 0;
}
uint32_t a = values[0] >> 8;
uint32_t b = values[1] >> 8;
*p_error = total_err;
return (bits << 16) | (b << 8) | (a);
}
uint64_t BC4CompressSimple(const uint16_t *src, int32_t *p_error)
{
uint16_t sorted[16];
memcpy(sorted, src, 16 * sizeof(uint16_t));
qsort(sorted, 16, sizeof(uint16_t), &CompareU16);
int32_t best_error = *p_error;
uint64_t best_bits = 0;
for (uint32_t drop_lo = 0; drop_lo < 15; drop_lo++) {
for (uint32_t drop_hi = 0; drop_hi < 15 - drop_lo; drop_hi++) {
int32_t lo = sorted[drop_lo];
int32_t hi = sorted[15 - drop_hi];
for (uint32_t swap = 0; swap < 2; swap++) {
int32_t err = best_error;
uint64_t bits = BC4CompressFit(src, lo, hi, swap != 0, &err);
if (err < best_error) {
best_error = err;
best_bits = bits;
}
}
}
}
*p_error = best_error;
return best_bits;
}
void BC4CompressBlock(void *dst, const image *img, uint32_t block_x, uint32_t block_y, uint32_t channel)
{
uint16_t block[16];
uint32_t base_x = block_x * 4;
uint32_t base_y = block_y * 4;
for (uint32_t y = 0; y < 4; y++) {
for (uint32_t x = 0; x < 4; x++) {
block[y * 4 + x] = ImageGetU16(img, base_x + x, base_y + y, channel);
}
}
int32_t error = INT32_MAX;
uint64_t result = BC4CompressSimple(block, &error);
char *dst_p = (char*)dst;
for (uint32_t i = 0; i < 8; i++) {
dst_p[i] = (char)(result >> (i * 8));
}
}
image LoadImage(const char *path, uint32_t req_channels)
{
image img = { NULL };
int width, height, channels;
uint16_t *pixels = stbi_load_16(path, &width, &height, &channels, (int)req_channels);
if (!pixels) return img;
img.pixels = pixels;
img.width = width;
img.height = height;
img.stride = width * channels;
img.channels = channels;
return img;
}
void CrunchImage(image *img, double scale, double bias)
{
uint32_t count = img->width * img->height * img->channels;
for (uint32_t i = 0; i < count; i++) {
double value = (double)img->pixels[i];
double v = value * scale + bias;
if (v < 0.0) v = 0.0;
if (v > 65535.0) v = 65535.0;
img->pixels[i] = (uint16_t)v;
}
}
image ResizeImage(const image *img, uint32_t width, uint32_t height, stbir_filter filter)
{
image res;
res.pixels = (uint16_t*)malloc(width * height * img->channels * sizeof(uint16_t));
res.width = width;
res.height = height;
res.channels = img->channels;
res.stride = res.width * res.channels;
stbir_resize_uint16_generic(
img->pixels, (int)img->width, (int)img->height, (int)img->stride * sizeof(uint16_t),
res.pixels, (int)res.width, (int)res.height, (int)res.stride * sizeof(uint16_t),
(int)res.channels, STBIR_ALPHA_CHANNEL_NONE, 0, STBIR_EDGE_CLAMP, filter,
STBIR_COLORSPACE_SRGB, NULL);
return res;
}
image PadImage(const image *img, uint32_t width, uint32_t height)
{
uint32_t channels = img->channels;
image res;
res.width = width;
res.height = height;
res.channels = channels;
res.stride = res.width * res.channels;
res.pixels = (uint16_t*)malloc(width * height * channels * sizeof(uint16_t));
for (uint32_t y = 0; y < height; y++) {
for (uint32_t x = 0; x < width; x++) {
uint16_t *dst = res.pixels + y * res.stride + x * channels;
for (uint32_t c = 0; c < channels; c++) {
dst[c] = ImageGetU16(img, x, y, c);
}
}
}
return res;
}
void FreeImage(image *img)
{
free(img->pixels);
memset(img, 0, sizeof(image));
}
void WriteData(FILE *f, const void *data, size_t size)
{
if (size == 0) return;
fwrite(data, 1, size, f);
}
void WriteU32(FILE *f, uint32_t v)
{
uint8_t bytes[4];
for (uint32_t i = 0; i < 4; i++)
bytes[i] = v >> (i * 8);
WriteData(f, bytes, 4);
}
typedef struct ktx_mip {
const uint8_t *data;
uint32_t size;
} ktx_mip;
typedef struct ktx_key_value {
const char *key;
const void *value;
uint32_t value_size;
} ktx_key_value;
typedef struct ktx_header {
uint32_t gl_type;
uint32_t gl_type_size;
uint32_t gl_format;
uint32_t gl_internal_format;
uint32_t gl_base_internal_format;
uint32_t pixel_width;
uint32_t pixel_height;
uint32_t pixel_depth;
uint32_t layer_count;
uint32_t face_count;
uint32_t level_count;
const ktx_key_value *key_values;
uint32_t key_value_count;
} ktx_header;
void WriteKTX(FILE *f, const ktx_header *header, const ktx_mip *mips, size_t num_mips)
{
const uint8_t magic[12] = {
0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A,
};
WriteData(f, magic, 12);
WriteU32(f, 0x04030201); // endianness
WriteU32(f, header->gl_type); // glType
WriteU32(f, header->gl_type_size); // glTypeSize
WriteU32(f, header->gl_format); // glFormat
WriteU32(f, header->gl_internal_format); // glInternalFormat: COMPRESSED_R11_EAC
WriteU32(f, header->gl_base_internal_format); // glBaseInternalFormat: GL_RED
WriteU32(f, header->pixel_width); // pixelWidth
WriteU32(f, header->pixel_height); // pixelHeight
WriteU32(f, header->pixel_depth); // pixelDepth
WriteU32(f, header->layer_count); // layerCount
WriteU32(f, header->face_count); // faceCount
WriteU32(f, header->level_count); // levelCount
uint32_t key_value_size = 0;
for (uint32_t i = 0; i < header->key_value_count; i++) {
const ktx_key_value *kv = &header->key_values[i];
uint32_t key_size = (uint32_t)strlen(kv->key) + 1;
key_value_size += key_size;
key_value_size += kv->value_size;
key_value_size += (4 - (key_size + kv->value_size) % 4) % 4;
}
WriteU32(f, key_value_size);
for (uint32_t i = 0; i < header->key_value_count; i++) {
const ktx_key_value *kv = &header->key_values[i];
uint32_t key_size = (uint32_t)strlen(kv->key) + 1;
WriteU32(f, key_size + kv->value_size);
WriteData(f, kv->key, key_size);
WriteData(f, kv->value, kv->value_size);
WriteData(f, "\0\0\0", (4 - (key_size + kv->value_size) % 4) % 4);
}
assert(num_mips == MaxU32(header->layer_count, 1) * header->face_count * header->level_count);
for (uint32_t i = 0; i < num_mips; i++) {
WriteU32(f, mips[i].size);
WriteData(f, mips[i].data, mips[i].size);
}
}
bool SaveKTX(const char *path, const ktx_header *header, const ktx_mip *mips, size_t num_mips)
{
FILE *f = fopen(path, "wb");
if (!f) return false;
WriteKTX(f, header, mips, num_mips);
return fclose(f) == 0;
}
int main(int argc, char **argv)
{
uint32_t mip_count = 3;
image original_img = LoadImage(argv[1], 1);
uint32_t alignment = 4 << (mip_count - 1);
uint32_t original_width = original_img.width;
uint32_t original_height = original_img.height;
uint32_t top_width = (original_width + alignment - 1) & ~(alignment - 1);
uint32_t top_height = (original_height + alignment - 1) & ~(alignment - 1);
image top_img = PadImage(&original_img, top_width, top_height);
FreeImage(&original_img);
ktx_mip mips_eac[16];
ktx_mip mips_bc4[16];
eac_tables tables;
EacInitTables(&tables);
for (uint32_t mip_ix = 0; mip_ix < mip_count; mip_ix++) {
image img;
if (mip_ix == 0) {
img = top_img;
} else {
uint32_t src_width = ((top_width + (1 << mip_ix)/2) >> mip_ix);
uint32_t src_height = ((top_height + (1 << mip_ix)/2) >> mip_ix);
img = ResizeImage(&top_img, src_width, src_height, STBIR_FILTER_CATMULLROM);
}
uint32_t mip_width = top_width >> mip_ix;
uint32_t mip_height = top_height >> mip_ix;
assert(mip_width % 4 == 0);
assert(mip_height % 4 == 0);
uint32_t blocks_x = mip_width / 4;
uint32_t blocks_y = mip_height / 4;
uint32_t block_size = 8;
uint8_t *result_eac = calloc(block_size, blocks_x * blocks_y);
uint8_t *result_bc4 = calloc(block_size, blocks_x * blocks_y);
for (uint32_t y = 0; y < blocks_y; y++) {
for (uint32_t x = 0; x < blocks_x; x++) {
{
uint8_t *dst = result_eac + y*blocks_x*block_size + x*block_size;
EacCompressBlock(&tables, dst, &img, x, y, 0);
}
{
uint8_t *dst = result_bc4 + y*blocks_x*block_size + x*block_size;
BC4CompressBlock(dst, &img, x, y, 0);
}
}
printf("%u/%u\n", y+1, blocks_y);
}
mips_eac[mip_ix].data = result_eac;
mips_eac[mip_ix].size = blocks_x * blocks_y * block_size;
mips_bc4[mip_ix].data = result_bc4;
mips_bc4[mip_ix].size = blocks_x * blocks_y * block_size;
if (mip_ix > 0) {
FreeImage(&img);
}
}
FreeImage(&top_img);
ktx_header ktx_base = { 0 };
ktx_base.gl_type = 0;
ktx_base.gl_type_size = 1;
ktx_base.gl_format = 0;
ktx_base.pixel_width = top_width;
ktx_base.pixel_height = top_height;
ktx_base.pixel_depth = 0;
ktx_base.layer_count = 0;
ktx_base.face_count = 1;
ktx_base.level_count = mip_count;
char mango_json[512];
snprintf(mango_json, sizeof(mango_json),
"{ \"originalSize\": { \"x\": %u, \"y\": %u } }",
original_width, original_height);
ktx_key_value key_values[] = {
{ "mango:json", mango_json, strlen(mango_json) + 1 },
};
ktx_base.key_values = key_values;
ktx_base.key_value_count = ArraySize(key_values);
{
ktx_header ktx_head = ktx_base;
ktx_head.gl_internal_format = 0x9270; // COMPRESSED_R11_EAC
ktx_head.gl_base_internal_format = 0x1903; // GL_RED
SaveKTX("test_eac.ktx", &ktx_head, mips_eac, mip_count);
}
{
ktx_header ktx_head = ktx_base;
ktx_head.gl_internal_format = 0x8DBB; // GL_COMPRESSED_RED_RGTC1_EXT
ktx_head.gl_base_internal_format = 0x1903; // GL_RED
SaveKTX("test_bc4.ktx", &ktx_head, mips_bc4, mip_count);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment