Created
September 10, 2023 10:48
-
-
Save robertmassaioli/68170816829af3b85c94cb129e9b8d68 to your computer and use it in GitHub Desktop.
Word Popularity Tool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const readline = require('readline'); | |
const https = require('https'); | |
const wordListURL = 'https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english-no-swears.txt'; | |
// Function to download the word list | |
async function downloadWordList() { | |
return new Promise((resolve, reject) => { | |
https.get(wordListURL, (response) => { | |
let data = ''; | |
response.on('data', (chunk) => { | |
data += chunk; | |
}); | |
response.on('end', () => { | |
resolve(data.split('\n')); | |
}); | |
}).on('error', (error) => { | |
reject(error); | |
}); | |
}); | |
} | |
// Function to process user input | |
async function processInput() { | |
const wordList = await downloadWordList(); | |
const wordFrequencyMap = new Map(); | |
const rl = readline.createInterface({ | |
input: process.stdin, | |
output: process.stdout, | |
terminal: false, | |
}); | |
rl.on('line', (line) => { | |
const words = line.split(/\s+/); | |
for (const word of words) { | |
const cleanedWord = word.toLowerCase().replace(/[^a-zA-Z]/g, ''); | |
const frequency = (wordFrequencyMap.get(cleanedWord) || 0) + 1; | |
wordFrequencyMap.set(cleanedWord, frequency); | |
} | |
}); | |
rl.on('close', () => { | |
// Create an array of word objects with word, frequency, and popularity | |
const wordArray = []; | |
for (const [word, frequency] of wordFrequencyMap.entries()) { | |
const popularity = wordList.indexOf(word) !== -1 ? wordList.indexOf(word) + 1 : 'unpopular'; | |
wordArray.push({ word, frequency, popularity }); | |
} | |
// Sort the word array by frequency in descending order | |
wordArray.sort((a, b) => b.frequency - a.frequency); | |
console.log('Word List (Top 10,000 words):'); | |
for (let i = 0; i < Math.min(10, wordList.length); i++) { | |
console.log(`${i + 1}. ${wordList[i]}`); | |
} | |
console.log('\nUnique Words\tFrequency\tPopularity'); | |
// Print the sorted word array | |
for (const { word, frequency, popularity } of wordArray) { | |
console.log(`${word}\t\t${frequency}\t\t${popularity}`); | |
} | |
// Create and print the histogram | |
const histogram = createHistogram(wordArray); | |
console.log('\nWord Popularity Histogram:'); | |
// Sort the histogram by bucket number (popularity) | |
const sortedHistogram = new Map([...histogram.entries()].sort((a, b) => { | |
if (a[0] === 'unpopular') { | |
return 1; // Place 'unpopular' at the end | |
} | |
if (b[0] === 'unpopular') { | |
return -1; // Place 'unpopular' at the end | |
} | |
return a[0] - b[0]; // Sort by popularity | |
})); | |
// Calculate the total count of words in the histogram | |
const totalCount = [...sortedHistogram.values()].reduce((total, count) => total + count, 0); | |
for (const [bucket, frequency] of sortedHistogram.entries()) { | |
// Calculate the percentage | |
const percentage = ((frequency / totalCount) * 100).toFixed(2); | |
// Determine the range based on popularity | |
const start = bucket === 'unpopular' ? 'unpopular' : (bucket - 1) * 100 + 1; | |
const end = bucket === 'unpopular' ? 'unpopular' : bucket * 100; | |
console.log(`Bucket ${start}-${end}: ${frequency} (${percentage}%)`); | |
} | |
}); | |
} | |
// Function to create a histogram based on word popularity | |
function createHistogram(wordArray) { | |
const histogram = new Map(); | |
for (const { popularity, frequency } of wordArray) { | |
const bucket = popularity === 'unpopular' ? 'unpopular' : Math.ceil(popularity / 100); | |
if (histogram.has(bucket)) { | |
histogram.set(bucket, histogram.get(bucket) + frequency); | |
} else { | |
histogram.set(bucket, frequency); | |
} | |
} | |
return histogram; | |
} | |
// Start processing user input | |
processInput(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment