Created
December 1, 2023 04:25
-
-
Save vijayanandvk/79621944201234ea3ca4f574df812bbe to your computer and use it in GitHub Desktop.
NodeJS script to check bulk URLs and report any 404 errors
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const axios = require('axios'); | |
const { Worker, isMainThread, parentPort, workerData } = require('worker_threads'); | |
// Read the list of URLs from a text file, one URL per line | |
const readFile = async (filePath) => { | |
try { | |
const data = await fs.promises.readFile(filePath, 'utf-8'); | |
return data.split('\n').filter((url) => url.trim() !== ''); | |
} catch (error) { | |
console.log(`Error reading file: ${error.message}`); | |
process.exit(1); | |
} | |
}; | |
// Function to check the status of a batch of URLs | |
const checkUrlsBatch = async (urls, workerId) => { | |
for (const url of urls) { | |
try { | |
const response = await axios.get(url); | |
if (response.status === 404) { | |
console.log(`Worker ${workerId} - 404 Error: ${url}`); | |
} else { | |
console.log(`Worker ${workerId} - OK: ${url}`); | |
} | |
} catch (error) { | |
console.log(`Worker ${workerId} - Error checking URL: ${url}`); | |
} | |
} | |
}; | |
if (isMainThread) { | |
// Main thread code | |
const filePath = 'urls.txt'; // Replace with the path to your text file containing URLs | |
readFile(filePath) | |
.then((urls) => { | |
const numThreads = 500; // Adjust the number of threads as needed | |
const batchSize = Math.ceil(urls.length / numThreads); | |
for (let i = 0; i < numThreads; i++) { | |
const startIdx = i * batchSize; | |
const endIdx = (i + 1) * batchSize; | |
const workerUrls = urls.slice(startIdx, endIdx); | |
const worker = new Worker(__filename, { | |
workerData: { urls: workerUrls, workerId: i }, | |
}); | |
worker.on('error', (error) => { | |
console.log(`Worker ${i} encountered an error: ${error.message}`); | |
}); | |
} | |
}) | |
.catch((error) => { | |
console.log(`Error reading file: ${error.message}`); | |
}); | |
} else { | |
// Worker thread code | |
const { urls, workerId } = workerData; | |
checkUrlsBatch(urls, workerId); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment