Last active
August 7, 2023 04:12
-
-
Save bqqbarbhg/dfb26b88f9fa642e493737d9c8298ce2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _CRT_SECURE_NO_WARNINGS | |
#define STB_IMAGE_IMPLEMENTATION | |
#include "stb_image.h" | |
#define STB_IMAGE_RESIZE_IMPLEMENTATION | |
#include "stb_image_resize.h" | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdbool.h> | |
#include <assert.h> | |
#define ArraySize(arr) (sizeof(arr)/sizeof(*(arr))) | |
const int8_t etc2_alpha_modifiers[][8] = { | |
{ -3, -6, -9, -15, 2, 5, 8, 14, }, | |
{ -3, -7, -10, -13, 2, 6, 9, 12, }, | |
{ -2, -5, -8, -13, 1, 4, 7, 12, }, | |
{ -2, -4, -6, -13, 1, 3, 5, 12, }, | |
{ -3, -6, -8, -12, 2, 5, 7, 11, }, | |
{ -3, -7, -9, -11, 2, 6, 8, 10, }, | |
{ -4, -7, -8, -11, 3, 6, 7, 10, }, | |
{ -3, -5, -8, -11, 2, 4, 7, 10, }, | |
{ -2, -6, -8, -10, 1, 5, 7, 9, }, | |
{ -2, -5, -8, -10, 1, 4, 7, 9, }, | |
{ -2, -4, -8, -10, 1, 3, 7, 9, }, | |
{ -2, -5, -7, -10, 1, 4, 6, 9, }, | |
{ -3, -4, -7, -10, 2, 3, 6, 9, }, | |
{ -1, -2, -3, -10, 0, 1, 2, 9, }, | |
{ -4, -6, -8, -9, 3, 5, 7, 8, }, | |
{ -3, -5, -7, -9, 2, 4, 6, 8, }, | |
}; | |
inline int32_t ClampU11(int32_t value) | |
{ | |
if (value < 0) return 0; | |
if (value > 2047) return 2047; | |
return value; | |
} | |
inline uint32_t MinU32(uint32_t a, uint32_t b) | |
{ | |
return a < b ? a : b; | |
} | |
inline uint32_t MaxU32(uint32_t a, uint32_t b) | |
{ | |
return a < b ? b : a; | |
} | |
inline int32_t EacExpandMultiplier(int32_t multiplier) | |
{ | |
return multiplier ? multiplier * 8 : 1; | |
} | |
typedef struct eac_mode_dec { | |
int32_t base; | |
const int16_t *table; | |
} eac_mode_dec; | |
typedef struct eac_tables { | |
int16_t table_multiplier[16][16][8]; | |
} eac_tables; | |
void EacInitTables(eac_tables *tables) | |
{ | |
for (uint32_t table_index = 0; table_index < 16; table_index++) { | |
const int8_t *src = etc2_alpha_modifiers[table_index]; | |
for (uint32_t multiplier = 0; multiplier < 16; multiplier++) { | |
int32_t multiplier_value = EacExpandMultiplier(multiplier); | |
int16_t *dst = tables->table_multiplier[table_index][multiplier]; | |
for (uint32_t i = 0; i < 8; i++) { | |
dst[i] = (int16_t)(src[i] * multiplier_value); | |
} | |
} | |
} | |
} | |
inline eac_mode_dec EacDecodeInit(const eac_tables *tables, uint32_t base_codeword, uint32_t table_index, uint32_t multiplier) | |
{ | |
eac_mode_dec dec; | |
dec.base = (int32_t)base_codeword * 8 + 4; | |
dec.table = tables->table_multiplier[table_index][multiplier]; | |
return dec; | |
} | |
// Decode an EAC value, result in [0, 2047] | |
inline int32_t EacDecodeU11(eac_mode_dec mode, int32_t index) | |
{ | |
return ClampU11(mode.base + mode.table[index]); | |
} | |
inline int32_t AbsI32(int32_t a) | |
{ | |
return a >= 0 ? a : -a; | |
} | |
uint64_t EacCompressFit(eac_mode_dec mode, const int32_t *block_u11, int32_t *p_error) | |
{ | |
uint64_t bits = 0; | |
int32_t total_err = 0; | |
int32_t max_err = *p_error; | |
for (uint32_t px = 0; px < 16; px++) { | |
int32_t best_err = INT32_MAX; | |
uint32_t best_ti = 0; | |
int32_t ref = block_u11[px]; | |
for (uint32_t ti = 0; ti < 8; ti++) { | |
int32_t err = AbsI32(ref - EacDecodeU11(mode, ti)); | |
if (err < best_err) { | |
best_err = err; | |
best_ti = ti; | |
} | |
} | |
bits |= (uint64_t)best_ti << ((15 - px) * 3); | |
total_err += best_err; | |
if (total_err >= max_err) return 0; | |
} | |
*p_error = total_err; | |
return bits; | |
} | |
int32_t EacErrorToCompressed(const eac_tables *tables, uint64_t packed, const int32_t *block_u11) | |
{ | |
uint32_t base_codeword = (uint32_t)(packed >> 56) & 0xff; | |
uint32_t multiplier = (uint32_t)(packed >> 52) & 0xf; | |
uint32_t table_index = (uint32_t)(packed >> 48) & 0xf; | |
eac_mode_dec mode = EacDecodeInit(tables, base_codeword, table_index, multiplier); | |
int32_t total_err = 0; | |
for (uint32_t px = 0; px < 16; px++) { | |
int32_t ref = block_u11[px]; | |
uint32_t ti = (packed >> ((15 - px) * 3)) & 0x7; | |
total_err += AbsI32(ref - EacDecodeU11(mode, ti)); | |
} | |
return total_err; | |
} | |
uint64_t EacCompressSimple(const eac_tables *tables, const uint16_t *src, int32_t *p_error) | |
{ | |
int32_t block_u11[16]; | |
uint32_t min_v = UINT32_MAX, max_v = 0; | |
for (uint32_t px = 0; px < 16; px++) { | |
min_v = MinU32(min_v, src[px]); | |
max_v = MaxU32(max_v, src[px]); | |
block_u11[px] = (int32_t)(src[px] >> 5); | |
} | |
uint32_t base_codeword = (min_v + max_v) / 2 >> 8; | |
int32_t best_error = *p_error; | |
uint64_t best_bits = 0; | |
for (uint32_t table_index = 0; table_index < 16; table_index++) { | |
for (uint32_t multiplier = 0; multiplier < 16; multiplier++) { | |
eac_mode_dec mode = EacDecodeInit(tables, base_codeword, table_index, multiplier); | |
int32_t err = best_error; | |
uint64_t pixel_bits = EacCompressFit(mode, block_u11, &err); | |
if (err < best_error) { | |
best_error = err; | |
best_bits = pixel_bits | |
| (uint64_t)base_codeword << 56 | |
| (uint64_t)multiplier << 52 | |
| (uint64_t)table_index << 48; | |
} | |
} | |
} | |
*p_error = best_error; | |
return best_bits; | |
} | |
typedef struct image { | |
uint16_t *pixels; | |
uint32_t stride, channels; | |
uint32_t width, height; | |
} image; | |
uint16_t ImageGetU16(const image *img, uint32_t x, uint32_t y, uint32_t c) | |
{ | |
x = MinU32(x, img->width - 1); | |
y = MinU32(y, img->height - 1); | |
return img->pixels[(y * img->stride + x) * img->channels + c]; | |
} | |
void EacCompressBlock(const eac_tables *tables, void *dst, const image *img, uint32_t block_x, uint32_t block_y, uint32_t channel) | |
{ | |
uint16_t block[16]; | |
uint32_t base_x = block_x * 4; | |
uint32_t base_y = block_y * 4; | |
// ETC block layout is vertical so need to load transposed | |
for (uint32_t y = 0; y < 4; y++) { | |
for (uint32_t x = 0; x < 4; x++) { | |
block[x * 4 + y] = ImageGetU16(img, base_x + x, base_y + y, channel); | |
} | |
} | |
int32_t error = INT32_MAX; | |
uint64_t result = EacCompressSimple(tables, block, &error); | |
char *dst_p = (char*)dst; | |
for (uint32_t i = 0; i < 8; i++) { | |
dst_p[i] = (char)(result >> (56 - i * 8)); | |
} | |
} | |
int CompareU16(const void *va, const void *vb) | |
{ | |
const uint16_t a = *(const uint16_t*)va, b = *(const uint16_t*)vb; | |
if (a != b) return a < b ? -1 : 1; | |
return 0; | |
} | |
uint64_t BC4CompressFit(const uint16_t *src, int32_t lo, int32_t hi, bool swap, int32_t *p_error) | |
{ | |
if ((hi >> 8) == (lo >> 8)) { | |
if (hi < 0x8000) { | |
hi += 0x100; | |
} else { | |
lo -= 0x100; | |
} | |
} | |
int32_t values[8]; | |
if (swap) { | |
values[0] = lo; | |
values[1] = hi; | |
values[2] = (4*lo + 1*hi) / 5; | |
values[3] = (3*lo + 2*hi) / 5; | |
values[4] = (2*lo + 3*hi) / 5; | |
values[5] = (1*lo + 4*hi) / 5; | |
values[6] = 0; | |
values[7] = 0xffff; | |
} else { | |
values[0] = hi; | |
values[1] = lo; | |
values[2] = (6*hi + 1*lo) / 7; | |
values[3] = (5*hi + 2*lo) / 7; | |
values[4] = (4*hi + 3*lo) / 7; | |
values[5] = (3*hi + 4*lo) / 7; | |
values[6] = (2*hi + 5*lo) / 7; | |
values[7] = (1*hi + 6*lo) / 7; | |
} | |
uint64_t bits = 0; | |
int32_t total_err = 0; | |
int32_t max_err = *p_error; | |
for (uint32_t px = 0; px < 16; px++) { | |
int32_t best_err = INT32_MAX; | |
uint32_t best_ti = 0; | |
int32_t ref = src[px]; | |
for (uint32_t ti = 0; ti < 8; ti++) { | |
int32_t err = AbsI32(ref - values[ti]); | |
if (err < best_err) { | |
best_err = err; | |
best_ti = ti; | |
} | |
} | |
bits |= (uint64_t)best_ti << (px * 3); | |
total_err += best_err; | |
if (total_err >= max_err) return 0; | |
} | |
uint32_t a = values[0] >> 8; | |
uint32_t b = values[1] >> 8; | |
*p_error = total_err; | |
return (bits << 16) | (b << 8) | (a); | |
} | |
uint64_t BC4CompressSimple(const uint16_t *src, int32_t *p_error) | |
{ | |
uint16_t sorted[16]; | |
memcpy(sorted, src, 16 * sizeof(uint16_t)); | |
qsort(sorted, 16, sizeof(uint16_t), &CompareU16); | |
int32_t best_error = *p_error; | |
uint64_t best_bits = 0; | |
for (uint32_t drop_lo = 0; drop_lo < 15; drop_lo++) { | |
for (uint32_t drop_hi = 0; drop_hi < 15 - drop_lo; drop_hi++) { | |
int32_t lo = sorted[drop_lo]; | |
int32_t hi = sorted[15 - drop_hi]; | |
for (uint32_t swap = 0; swap < 2; swap++) { | |
int32_t err = best_error; | |
uint64_t bits = BC4CompressFit(src, lo, hi, swap != 0, &err); | |
if (err < best_error) { | |
best_error = err; | |
best_bits = bits; | |
} | |
} | |
} | |
} | |
*p_error = best_error; | |
return best_bits; | |
} | |
void BC4CompressBlock(void *dst, const image *img, uint32_t block_x, uint32_t block_y, uint32_t channel) | |
{ | |
uint16_t block[16]; | |
uint32_t base_x = block_x * 4; | |
uint32_t base_y = block_y * 4; | |
for (uint32_t y = 0; y < 4; y++) { | |
for (uint32_t x = 0; x < 4; x++) { | |
block[y * 4 + x] = ImageGetU16(img, base_x + x, base_y + y, channel); | |
} | |
} | |
int32_t error = INT32_MAX; | |
uint64_t result = BC4CompressSimple(block, &error); | |
char *dst_p = (char*)dst; | |
for (uint32_t i = 0; i < 8; i++) { | |
dst_p[i] = (char)(result >> (i * 8)); | |
} | |
} | |
image LoadImage(const char *path, uint32_t req_channels) | |
{ | |
image img = { NULL }; | |
int width, height, channels; | |
uint16_t *pixels = stbi_load_16(path, &width, &height, &channels, (int)req_channels); | |
if (!pixels) return img; | |
img.pixels = pixels; | |
img.width = width; | |
img.height = height; | |
img.stride = width * channels; | |
img.channels = channels; | |
return img; | |
} | |
void CrunchImage(image *img, double scale, double bias) | |
{ | |
uint32_t count = img->width * img->height * img->channels; | |
for (uint32_t i = 0; i < count; i++) { | |
double value = (double)img->pixels[i]; | |
double v = value * scale + bias; | |
if (v < 0.0) v = 0.0; | |
if (v > 65535.0) v = 65535.0; | |
img->pixels[i] = (uint16_t)v; | |
} | |
} | |
image ResizeImage(const image *img, uint32_t width, uint32_t height, stbir_filter filter) | |
{ | |
image res; | |
res.pixels = (uint16_t*)malloc(width * height * img->channels * sizeof(uint16_t)); | |
res.width = width; | |
res.height = height; | |
res.channels = img->channels; | |
res.stride = res.width * res.channels; | |
stbir_resize_uint16_generic( | |
img->pixels, (int)img->width, (int)img->height, (int)img->stride * sizeof(uint16_t), | |
res.pixels, (int)res.width, (int)res.height, (int)res.stride * sizeof(uint16_t), | |
(int)res.channels, STBIR_ALPHA_CHANNEL_NONE, 0, STBIR_EDGE_CLAMP, filter, | |
STBIR_COLORSPACE_SRGB, NULL); | |
return res; | |
} | |
image PadImage(const image *img, uint32_t width, uint32_t height) | |
{ | |
uint32_t channels = img->channels; | |
image res; | |
res.width = width; | |
res.height = height; | |
res.channels = channels; | |
res.stride = res.width * res.channels; | |
res.pixels = (uint16_t*)malloc(width * height * channels * sizeof(uint16_t)); | |
for (uint32_t y = 0; y < height; y++) { | |
for (uint32_t x = 0; x < width; x++) { | |
uint16_t *dst = res.pixels + y * res.stride + x * channels; | |
for (uint32_t c = 0; c < channels; c++) { | |
dst[c] = ImageGetU16(img, x, y, c); | |
} | |
} | |
} | |
return res; | |
} | |
void FreeImage(image *img) | |
{ | |
free(img->pixels); | |
memset(img, 0, sizeof(image)); | |
} | |
void WriteData(FILE *f, const void *data, size_t size) | |
{ | |
if (size == 0) return; | |
fwrite(data, 1, size, f); | |
} | |
void WriteU32(FILE *f, uint32_t v) | |
{ | |
uint8_t bytes[4]; | |
for (uint32_t i = 0; i < 4; i++) | |
bytes[i] = v >> (i * 8); | |
WriteData(f, bytes, 4); | |
} | |
typedef struct ktx_mip { | |
const uint8_t *data; | |
uint32_t size; | |
} ktx_mip; | |
typedef struct ktx_key_value { | |
const char *key; | |
const void *value; | |
uint32_t value_size; | |
} ktx_key_value; | |
typedef struct ktx_header { | |
uint32_t gl_type; | |
uint32_t gl_type_size; | |
uint32_t gl_format; | |
uint32_t gl_internal_format; | |
uint32_t gl_base_internal_format; | |
uint32_t pixel_width; | |
uint32_t pixel_height; | |
uint32_t pixel_depth; | |
uint32_t layer_count; | |
uint32_t face_count; | |
uint32_t level_count; | |
const ktx_key_value *key_values; | |
uint32_t key_value_count; | |
} ktx_header; | |
void WriteKTX(FILE *f, const ktx_header *header, const ktx_mip *mips, size_t num_mips) | |
{ | |
const uint8_t magic[12] = { | |
0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A, | |
}; | |
WriteData(f, magic, 12); | |
WriteU32(f, 0x04030201); // endianness | |
WriteU32(f, header->gl_type); // glType | |
WriteU32(f, header->gl_type_size); // glTypeSize | |
WriteU32(f, header->gl_format); // glFormat | |
WriteU32(f, header->gl_internal_format); // glInternalFormat: COMPRESSED_R11_EAC | |
WriteU32(f, header->gl_base_internal_format); // glBaseInternalFormat: GL_RED | |
WriteU32(f, header->pixel_width); // pixelWidth | |
WriteU32(f, header->pixel_height); // pixelHeight | |
WriteU32(f, header->pixel_depth); // pixelDepth | |
WriteU32(f, header->layer_count); // layerCount | |
WriteU32(f, header->face_count); // faceCount | |
WriteU32(f, header->level_count); // levelCount | |
uint32_t key_value_size = 0; | |
for (uint32_t i = 0; i < header->key_value_count; i++) { | |
const ktx_key_value *kv = &header->key_values[i]; | |
uint32_t key_size = (uint32_t)strlen(kv->key) + 1; | |
key_value_size += key_size; | |
key_value_size += kv->value_size; | |
key_value_size += (4 - (key_size + kv->value_size) % 4) % 4; | |
} | |
WriteU32(f, key_value_size); | |
for (uint32_t i = 0; i < header->key_value_count; i++) { | |
const ktx_key_value *kv = &header->key_values[i]; | |
uint32_t key_size = (uint32_t)strlen(kv->key) + 1; | |
WriteU32(f, key_size + kv->value_size); | |
WriteData(f, kv->key, key_size); | |
WriteData(f, kv->value, kv->value_size); | |
WriteData(f, "\0\0\0", (4 - (key_size + kv->value_size) % 4) % 4); | |
} | |
assert(num_mips == MaxU32(header->layer_count, 1) * header->face_count * header->level_count); | |
for (uint32_t i = 0; i < num_mips; i++) { | |
WriteU32(f, mips[i].size); | |
WriteData(f, mips[i].data, mips[i].size); | |
} | |
} | |
bool SaveKTX(const char *path, const ktx_header *header, const ktx_mip *mips, size_t num_mips) | |
{ | |
FILE *f = fopen(path, "wb"); | |
if (!f) return false; | |
WriteKTX(f, header, mips, num_mips); | |
return fclose(f) == 0; | |
} | |
int main(int argc, char **argv) | |
{ | |
uint32_t mip_count = 3; | |
image original_img = LoadImage(argv[1], 1); | |
uint32_t alignment = 4 << (mip_count - 1); | |
uint32_t original_width = original_img.width; | |
uint32_t original_height = original_img.height; | |
uint32_t top_width = (original_width + alignment - 1) & ~(alignment - 1); | |
uint32_t top_height = (original_height + alignment - 1) & ~(alignment - 1); | |
image top_img = PadImage(&original_img, top_width, top_height); | |
FreeImage(&original_img); | |
ktx_mip mips_eac[16]; | |
ktx_mip mips_bc4[16]; | |
eac_tables tables; | |
EacInitTables(&tables); | |
for (uint32_t mip_ix = 0; mip_ix < mip_count; mip_ix++) { | |
image img; | |
if (mip_ix == 0) { | |
img = top_img; | |
} else { | |
uint32_t src_width = ((top_width + (1 << mip_ix)/2) >> mip_ix); | |
uint32_t src_height = ((top_height + (1 << mip_ix)/2) >> mip_ix); | |
img = ResizeImage(&top_img, src_width, src_height, STBIR_FILTER_CATMULLROM); | |
} | |
uint32_t mip_width = top_width >> mip_ix; | |
uint32_t mip_height = top_height >> mip_ix; | |
assert(mip_width % 4 == 0); | |
assert(mip_height % 4 == 0); | |
uint32_t blocks_x = mip_width / 4; | |
uint32_t blocks_y = mip_height / 4; | |
uint32_t block_size = 8; | |
uint8_t *result_eac = calloc(block_size, blocks_x * blocks_y); | |
uint8_t *result_bc4 = calloc(block_size, blocks_x * blocks_y); | |
for (uint32_t y = 0; y < blocks_y; y++) { | |
for (uint32_t x = 0; x < blocks_x; x++) { | |
{ | |
uint8_t *dst = result_eac + y*blocks_x*block_size + x*block_size; | |
EacCompressBlock(&tables, dst, &img, x, y, 0); | |
} | |
{ | |
uint8_t *dst = result_bc4 + y*blocks_x*block_size + x*block_size; | |
BC4CompressBlock(dst, &img, x, y, 0); | |
} | |
} | |
printf("%u/%u\n", y+1, blocks_y); | |
} | |
mips_eac[mip_ix].data = result_eac; | |
mips_eac[mip_ix].size = blocks_x * blocks_y * block_size; | |
mips_bc4[mip_ix].data = result_bc4; | |
mips_bc4[mip_ix].size = blocks_x * blocks_y * block_size; | |
if (mip_ix > 0) { | |
FreeImage(&img); | |
} | |
} | |
FreeImage(&top_img); | |
ktx_header ktx_base = { 0 }; | |
ktx_base.gl_type = 0; | |
ktx_base.gl_type_size = 1; | |
ktx_base.gl_format = 0; | |
ktx_base.pixel_width = top_width; | |
ktx_base.pixel_height = top_height; | |
ktx_base.pixel_depth = 0; | |
ktx_base.layer_count = 0; | |
ktx_base.face_count = 1; | |
ktx_base.level_count = mip_count; | |
char mango_json[512]; | |
snprintf(mango_json, sizeof(mango_json), | |
"{ \"originalSize\": { \"x\": %u, \"y\": %u } }", | |
original_width, original_height); | |
ktx_key_value key_values[] = { | |
{ "mango:json", mango_json, strlen(mango_json) + 1 }, | |
}; | |
ktx_base.key_values = key_values; | |
ktx_base.key_value_count = ArraySize(key_values); | |
{ | |
ktx_header ktx_head = ktx_base; | |
ktx_head.gl_internal_format = 0x9270; // COMPRESSED_R11_EAC | |
ktx_head.gl_base_internal_format = 0x1903; // GL_RED | |
SaveKTX("test_eac.ktx", &ktx_head, mips_eac, mip_count); | |
} | |
{ | |
ktx_header ktx_head = ktx_base; | |
ktx_head.gl_internal_format = 0x8DBB; // GL_COMPRESSED_RED_RGTC1_EXT | |
ktx_head.gl_base_internal_format = 0x1903; // GL_RED | |
SaveKTX("test_bc4.ktx", &ktx_head, mips_bc4, mip_count); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment