Skip to content

Instantly share code, notes, and snippets.

@g023
Last active April 13, 2024 05:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save g023/20144fb6f443b7538db69e516660f3dc to your computer and use it in GitHub Desktop.
Save g023/20144fb6f443b7538db69e516660f3dc to your computer and use it in GitHub Desktop.
Making dictionaries with Javascript
<!-- /*
// author: https://github.com/g023
// license: BSD 3-Clause (https://opensource.org/license/BSD-3-clause)
An example to compress and decompress some text based on a given dictionary.
Makes a simpler array from the dictionary created from findTwoCharCombinations(text) and findThreeCharCombinations(text)
That should make compression even better.
TODO: compress the dictionary using the dictionary.
*/ -->
<script>
function findThreeCharCombinations(text) {
const paddedText = ` ${text} `; // Pad the string with whitespace at both ends to accommodate 3-character combinations
const combinations = {};
// Iterate through the padded text to find all 3-character combinations
for (let i = 0; i < paddedText.length - 2; i++) {
const combination = paddedText.substring(i, i + 3);
// Check if the combination already exists in the combinations object
if (combinations[combination]) {
combinations[combination]++;
} else { // If it doesn't exist, add it to the combinations object with a count of 1
combinations[combination] = 1;
}
}
// Convert combinations object to array of objects for sorting
const combinationArray = Object.keys(combinations).map(key => ({ combination: key, count: combinations[key] }));
// Sort combinations by count (descending)
combinationArray.sort((a, b) => b.count - a.count);
return combinationArray;
}
// Example usage:
// const text = "This is a sample text with some three character combinations like 'Thi', 'his', 'isa', etc.";
// load text fileshake.txt in the same dir ./shake.txt from the server
const threeCharCombinations = findThreeCharCombinations(text);
console.log(threeCharCombinations);
// begin compression related
// function to load a text file from server
function loadFile(filePath) {
var result = null;
var xmlhttp = new XMLHttpRequest();
xmlhttp.open("GET", filePath, false);
xmlhttp.send();
if (xmlhttp.status==200) {
result = xmlhttp.responseText;
}
return result;
}
const text = loadFile("shake.txt");
function dictToArr(dict) {
// simplify the part we need to use for compression/decompression
let arr = [];
for (let i = 0; i < dict.length; i++) {
arr.push(dict[i].combination);
}
return arr;
}
function compressText(text, the_dict_arr) {
// ideas on restoring the padding:
// 1. store the length of padding at the beginning of the compressed text
// - then string recombining can be done after first.
let chunk_size = the_dict_arr[0].length;
console.log("Chunk size: ", chunk_size);
let compressed_text = [];
// first compressed_text element is size of padding
compressed_text.push(text.length % chunk_size);
let paddedText = text.padEnd(text.length + (chunk_size - text.length % chunk_size), ' ');
for (let i = 0; i < paddedText.length; i += chunk_size) {
let chunk = paddedText.substring(i, i + chunk_size);
let index = the_dict_arr.findIndex(x => x === chunk);
if (i === 0) {
compressed_text.push(index);
} else {
compressed_text.push(index - compressed_text[compressed_text.length - 1]);
}
}
return compressed_text;
}
function decompressText(compressed_text, the_dict_arr) {
// first compressed_text element is size of padding. remove it from the beginning and then process at end.
let chunk_size = the_dict_arr[0].length;
console.log("Chunk size: ", chunk_size);
// remove the padding
var padding = compressed_text.shift();
let decompressed_text = "";
for (let i = 0; i < compressed_text.length; i++) {
let index = compressed_text[i];
if (i === 0) {
decompressed_text += the_dict_arr[index];
} else {
decompressed_text += the_dict_arr[index + compressed_text[i - 1]];
}
}
// process padding
decompressed_text = decompressed_text.substring(0, decompressed_text.length - padding);
return decompressed_text;
}
// example
let the_dict = findThreeCharCombinations(text);
let the_dict_arr = dictToArr(the_dict);
let compressed_text = compressText(text, the_dict_arr);
console.log("Compressed text: ", compressed_text);
let decompressed_text = decompressText(compressed_text, the_dict_arr);
console.log("Decompressed text: ", decompressed_text);
// debug string sizes
console.log("Original text size: ", text.length);
console.log("Compressed text size: ", compressed_text.length);
console.log("Decompressed text size: ", decompressed_text.length);
// dict array size
console.log("Dict array size: ", the_dict_arr.length);
</script>
/*
// author: https://github.com/g023
// license: BSD 3-Clause (https://opensource.org/license/BSD-3-clause)
This function takes a string text as input and returns an array of objects containing the three-character combinations and their respective counts, ordered by frequency from most used to least used.
*/
function findThreeCharCombinations(text) {
const paddedText = ` ${text} `; // Pad the string with whitespace at both ends to accommodate 3-character combinations
const combinations = {};
// Iterate through the padded text to find all 3-character combinations
for (let i = 0; i < paddedText.length - 2; i++) {
const combination = paddedText.substring(i, i + 3);
// Ignore combinations containing whitespace
/*
if (!/\s/.test(combination)) {
if (combinations[combination]) {
combinations[combination]++;
} else {
combinations[combination] = 1;
}
}
*/
// alternate (no ws checking)
if (combinations[combination]) {
combinations[combination]++;
} else {
combinations[combination] = 1;
}
}
// Convert combinations object to array of objects for sorting
const combinationArray = Object.keys(combinations).map(key => ({ combination: key, count: combinations[key] }));
// Sort combinations by count (descending)
combinationArray.sort((a, b) => b.count - a.count);
return combinationArray;
}
// Example usage:
const text = "This is a sample text with some three character combinations like 'Thi', 'his', 'isa', etc.";
const threeCharCombinations = findThreeCharCombinations(text);
console.log(threeCharCombinations);
/*
// author: https://github.com/g023
// license: BSD 3-Clause (https://opensource.org/license/BSD-3-clause)
This function takes a string text as input and returns an array of objects containing the two-character combinations and their respective counts, ordered by frequency from most used to least used.
*/
function findTwoCharCombinations(text) {
const paddedText = ` ${text} `; // Pad the string with whitespace at both ends
const combinations = {};
// Iterate through the padded text to find all 2-character combinations
for (let i = 0; i < paddedText.length - 1; i++) {
const combination = paddedText.substring(i, i + 2);
// Ignore combinations containing whitespace
/*
if (!/\s/.test(combination)) {
if (combinations[combination]) {
combinations[combination]++;
} else {
combinations[combination] = 1;
}
}
*/
// alternate (no ws checkin)
if (combinations[combination]) {
combinations[combination]++;
} else {
combinations[combination] = 1;
}
}
// Convert combinations object to array of objects for sorting
const combinationArray = Object.keys(combinations).map(key => ({ combination: key, count: combinations[key] }));
// Sort combinations by count (descending)
combinationArray.sort((a, b) => b.count - a.count);
return combinationArray;
}
// Example usage:
const text = "This is a sample text with some two character combinations like 'th', 'is', 'sa', etc.";
const twoCharCombinations = findTwoCharCombinations(text);
console.log(twoCharCombinations);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment