Skip to content

Instantly share code, notes, and snippets.

@zrrrzzt
Created February 12, 2019 12:26
Show Gist options
  • Save zrrrzzt/f0f2e5d64f2b69b330f377423717d7a7 to your computer and use it in GitHub Desktop.
Save zrrrzzt/f0f2e5d64f2b69b330f377423717d7a7 to your computer and use it in GitHub Desktop.
Crawl a site and validate all pages
(async () => {
const siteMap = require('sitemap-crawler')
const validate = require('html-validator')
const url = 'http://www.npmjs.com'
function generateSitemap (url) {
return new Promise(async (resolve, reject) => {
siteMap(url, (error, result) => {
if (error) {
return reject(error)
} else {
return resolve(result)
}
})
})
}
function isValidHtml (result) {
const errorMessages = result.messages ? result.messages.filter(message => message.type === 'error') : []
return errorMessages.length === 0
}
let pagesToValidate = await generateSitemap(url)
const validateNextPage = async () => {
console.log(`Got ${pagesToValidate.length} pages left to check`)
if (pagesToValidate.length > 0) {
const pageUrl = pagesToValidate.pop()
const testResult = await validate({ url: pageUrl })
const resultJson = JSON.parse(testResult)
console.log(JSON.stringify(resultJson, null, 2))
if (isValidHtml(resultJson)) {
await validateNextPage()
} else {
console.error(`${pageUrl} got errors`)
console.log(JSON.stringify(resultJson, null, 2))
process.exit(1)
}
} else {
console.log('Finished')
}
}
await validateNextPage()
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment