Skip to content

Instantly share code, notes, and snippets.

@robertmassaioli
Created September 10, 2023 10:48
Show Gist options
  • Save robertmassaioli/68170816829af3b85c94cb129e9b8d68 to your computer and use it in GitHub Desktop.
Save robertmassaioli/68170816829af3b85c94cb129e9b8d68 to your computer and use it in GitHub Desktop.
Word Popularity Tool
const readline = require('readline');
const https = require('https');
const wordListURL = 'https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english-no-swears.txt';
// Function to download the word list
async function downloadWordList() {
return new Promise((resolve, reject) => {
https.get(wordListURL, (response) => {
let data = '';
response.on('data', (chunk) => {
data += chunk;
});
response.on('end', () => {
resolve(data.split('\n'));
});
}).on('error', (error) => {
reject(error);
});
});
}
// Function to process user input
async function processInput() {
const wordList = await downloadWordList();
const wordFrequencyMap = new Map();
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: false,
});
rl.on('line', (line) => {
const words = line.split(/\s+/);
for (const word of words) {
const cleanedWord = word.toLowerCase().replace(/[^a-zA-Z]/g, '');
const frequency = (wordFrequencyMap.get(cleanedWord) || 0) + 1;
wordFrequencyMap.set(cleanedWord, frequency);
}
});
rl.on('close', () => {
// Create an array of word objects with word, frequency, and popularity
const wordArray = [];
for (const [word, frequency] of wordFrequencyMap.entries()) {
const popularity = wordList.indexOf(word) !== -1 ? wordList.indexOf(word) + 1 : 'unpopular';
wordArray.push({ word, frequency, popularity });
}
// Sort the word array by frequency in descending order
wordArray.sort((a, b) => b.frequency - a.frequency);
console.log('Word List (Top 10,000 words):');
for (let i = 0; i < Math.min(10, wordList.length); i++) {
console.log(`${i + 1}. ${wordList[i]}`);
}
console.log('\nUnique Words\tFrequency\tPopularity');
// Print the sorted word array
for (const { word, frequency, popularity } of wordArray) {
console.log(`${word}\t\t${frequency}\t\t${popularity}`);
}
// Create and print the histogram
const histogram = createHistogram(wordArray);
console.log('\nWord Popularity Histogram:');
// Sort the histogram by bucket number (popularity)
const sortedHistogram = new Map([...histogram.entries()].sort((a, b) => {
if (a[0] === 'unpopular') {
return 1; // Place 'unpopular' at the end
}
if (b[0] === 'unpopular') {
return -1; // Place 'unpopular' at the end
}
return a[0] - b[0]; // Sort by popularity
}));
// Calculate the total count of words in the histogram
const totalCount = [...sortedHistogram.values()].reduce((total, count) => total + count, 0);
for (const [bucket, frequency] of sortedHistogram.entries()) {
// Calculate the percentage
const percentage = ((frequency / totalCount) * 100).toFixed(2);
// Determine the range based on popularity
const start = bucket === 'unpopular' ? 'unpopular' : (bucket - 1) * 100 + 1;
const end = bucket === 'unpopular' ? 'unpopular' : bucket * 100;
console.log(`Bucket ${start}-${end}: ${frequency} (${percentage}%)`);
}
});
}
// Function to create a histogram based on word popularity
function createHistogram(wordArray) {
const histogram = new Map();
for (const { popularity, frequency } of wordArray) {
const bucket = popularity === 'unpopular' ? 'unpopular' : Math.ceil(popularity / 100);
if (histogram.has(bucket)) {
histogram.set(bucket, histogram.get(bucket) + frequency);
} else {
histogram.set(bucket, frequency);
}
}
return histogram;
}
// Start processing user input
processInput();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment