Skip to content

Instantly share code, notes, and snippets.

@505e06b2
Created September 4, 2022 18:43
Show Gist options
  • Save 505e06b2/1167def474f1b0d3dd02ef739f9a0a2d to your computer and use it in GitHub Desktop.
Save 505e06b2/1167def474f1b0d3dd02ef739f9a0a2d to your computer and use it in GitHub Desktop.
Lossy ASCII compression
"use strict";
export const settings = {
debug: false
};
function padBits(bit_string, divisible_by) {
const is_divisible = bit_string.length % divisible_by === 0;
if(is_divisible) {
return bit_string;
}
const padding_required = divisible_by - bit_string.length % divisible_by;
if(padding_required) {
bit_string = bit_string.padStart(bit_string.length+padding_required, "0");
}
return bit_string;
}
const encoding_values = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
const encoded_char_binary_size = (encoding_values.length-1).toString(2).length;
function encode(binary_string) {
binary_string = padBits(binary_string, encoded_char_binary_size);
if(settings.debug) console.log("Padded for Encode:", binary_string);
const binary_values = binary_string.match(new RegExp(`.{${encoded_char_binary_size}}`, "g"));
let encoded = "";
for(const bits of binary_values) {
encoded += encoding_values[parseInt(bits, 2)];
}
return encoded;
}
function decode(encoded_string) {
let binary_string = "";
for(const c of encoded_string) {
const index = encoding_values.indexOf(c);
if(index === -1) {
console.error(`"${c}" is not in the encoding scheme`);
return;
}
binary_string += padBits(index.toString(2), encoded_char_binary_size);
}
const remove_padding = binary_string.length % subset_char_binary_size;
if(remove_padding) {
binary_string = binary_string.slice(remove_padding);
} else if(binary_string.startsWith("0".repeat(subset_char_binary_size))) { //already fits perfectly, as divisible by both 5 and 6 - remove padding used for encode
binary_string = binary_string.slice(subset_char_binary_size);
}
return binary_string;
}
const ascii_subset = "\nabcdefghijklmnopqrstuvwxyz,.!? ";
const subset_char_binary_size = (ascii_subset.length-1).toString(2).length;
export const _getRandomStr = (count) => { //for testing
let x = "";
for(let i = 0; i < count; i++) x += ascii_subset[parseInt(Math.random() * ascii_subset.length)];
return x.replace(/^\n+/, "");
}
export function compress(input_string) {
const trimmed = input_string.replace(/^\n+/, ""); //trimming is needed as if the first byte is 0, it could be mistaken for padding during encoding (newlines are the least harmful to remove)
if(trimmed !== input_string) {
console.warn("Trimmed leading newlines");
input_string = trimmed;
}
if(settings.debug) console.log("Input String:", input_string);
if(!input_string) return "";
let binary_string = "";
for(const c of input_string.toLowerCase()) {
const index = ascii_subset.indexOf(c);
if(index !== -1) {
binary_string += padBits(index.toString(2), subset_char_binary_size);
} else {
console.warn(`Skipping "${c}" as not in subset`);
}
}
if(settings.debug) console.log("Compressed:", binary_string);
return encode(binary_string);
}
export function decompress(input_encoded) {
if(!input_encoded) return "";
const binary_string = padBits(decode(input_encoded), subset_char_binary_size);
if(settings.debug) console.log("Padded for Decode:", binary_string);
const binary_values = binary_string.match(new RegExp(`.{${subset_char_binary_size}}`, "g"));
let decompressed = "";
for (const bits of binary_values) {
decompressed += ascii_subset[parseInt(bits, 2)];
}
return decompressed;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment