Skip to content

Instantly share code, notes, and snippets.

@marvinborner
Last active May 25, 2023 15:58
Show Gist options
  • Save marvinborner/1a17c3d04a78a76aeb421054848377df to your computer and use it in GitHub Desktop.
Save marvinborner/1a17c3d04a78a76aeb421054848377df to your computer and use it in GitHub Desktop.
Experiment for finding a short BLC encoding for binary data
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
static void to_digits(unsigned char *bytes, int n_bytes, unsigned int *digits,
int n_digits, unsigned int base)
{
for (int i_byte = 0; i_byte < n_bytes; i_byte++) {
unsigned char byte = bytes[i_byte];
for (int mask = 0x80; mask; mask >>= 1) {
int bit = (byte & mask) != 0;
digits[0] = digits[0] * 2 + bit;
for (int i_digit = 1; i_digit < n_digits; i_digit++) {
digits[i_digit] *= 2;
if (digits[i_digit - 1] > (base - 1)) {
digits[i_digit - 1] -= base;
digits[i_digit]++;
}
}
}
}
}
// returns the maximum digits needed for encoding n bytes in a base
static size_t digits_needed(int n_bytes, int base)
{
return (size_t)ceil(log(pow(256, n_bytes)) / log(base));
}
static void write_bit(char val, FILE *file, char *byte, int *bit)
{
if (*bit > 7) { // flush byte
fwrite(byte, 1, 1, file);
*byte = 0;
*bit = 0;
}
if (val)
*byte |= 1UL << (7 - *bit);
(*bit)++;
}
int main(int argc, char *argv[])
{
if (argc != 2) {
fprintf(stderr, "Usage: %s <file>\n", argv[0]);
return 1;
}
FILE *in = fopen(argv[1], "rb");
if (!in) {
fprintf(stderr, "Could not open file\n");
return 1;
}
fseek(in, 0, SEEK_END);
size_t n_bytes = ftell(in);
fseek(in, 0, SEEK_SET);
fprintf(stderr, "n_bytes: %zu\n", n_bytes);
unsigned char *bytes = calloc(n_bytes, sizeof(*bytes));
fread(bytes, 1, n_bytes, in);
fclose(in);
int base = 8;
size_t n_digits = digits_needed(n_bytes, base);
fprintf(stderr, "n_digits: %zu\n", n_digits);
unsigned int *digits = calloc(n_digits, sizeof(*digits));
to_digits(bytes, n_bytes, digits, n_digits, base);
for (; n_digits > 0 && !digits[n_digits - 1]; n_digits--)
;
FILE *out = stdout;
char byte = 0;
int bit = 0;
for (size_t i = 0; i < n_digits; i++) {
write_bit(0, out, &byte, &bit);
for (unsigned int j = 0; j < digits[i] + 1; j++) {
write_bit(1, out, &byte, &bit);
}
}
if (bit) // flush final
fwrite(&byte, 1, 1, out);
return 0;
}
#!/bin/env python3
from random import randint
def to_base(n, b):
if n == 0:
return [0]
digits = []
while n:
digits.append(int(n % b))
n //= b
return digits[::-1]
def random_with_n_digits(n):
range_start = 10 ** (n - 1)
range_end = (10**n) - 1
return randint(range_start, range_end)
def gen_for_increasing():
N = 10**6
B = range(2, 10)
for b in B:
avg = 0
for n in range(1, N + 1):
out = 2 * b # abstractions (00)
base = to_base(n, b)
for e in base[:-1]:
out += 2 # application (01)
out += e + 2
out += base[-1] + 2
avg += out
print(b, avg / N)
def gen_for_fixed_big():
N = 1000000
I = 5
B = range(2, 10)
for b in B:
avg = 0
for i in range(I):
n = random_with_n_digits(N)
out = 2 * b # abstractions (00)
base = to_base(n, b)
for e in base[:-1]:
out += 2 # application (01)
out += e + 2
out += base[-1] + 2
avg += out
print(b, float(avg) / I)
@marvinborner
Copy link
Author

This program is part of my article here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment