Skip to content

Instantly share code, notes, and snippets.

@kensykora
Created May 28, 2014 14:38
Show Gist options
  • Save kensykora/a19213dfd023c62bbfd7 to your computer and use it in GitHub Desktop.
Save kensykora/a19213dfd023c62bbfd7 to your computer and use it in GitHub Desktop.
W3C HTML Sitemap-based Site Validator
param ([Parameter(Mandatory=$True)][string]$SitemapUrl,
[int]$Max=0)
function Validate($url) {
if(-not $url.StartsWith("http")) {
$parsed = [System.Uri]$SitemapUrl
$url = "{0}://{1}{2}" -f $parsed.Scheme, $parsed.Host, $url
}
try {
$test = Invoke-WebRequest -Uri $url -MaximumRedirection 0 -ErrorAction Ignore
if ($test.StatusCode -ge 300) {
Write-Warning ("{0}: HTTP {1}" -f $url, $test.StatusCode)
} else {
$result = Invoke-RestMethod -Uri ("http://validator.w3.org/check?uri={0}&output=json" -f [System.Web.HttpUtility]::UrlEncode($url))
$errors = $result.messages | Where-Object { $_.subtype -eq "error" }
$warnings = $result.messages | Where-Object { $_.subtype -eq "warning" }
$message = ("{0}: {1} Error{2} {3} Warning{4}" -f $url, $errors.Count, @{$true="s";$false=""}[$errors.Count -gt 0], $warnings.Count, @{$true="s";$false=""}[$warnings.Count -gt 0])
if ($warnings.Count -gt 0 -or $errors.Count -gt 0)
{
Write-Warning $message
} else {
Write-Host $message
}
}
} catch {
Write-Warning ("{0} {1}" -f $url, $_.Exception.Message)
} #This is bad at handling 404's
}
[xml]$sitemap = (New-Object System.Net.WebClient).DownloadString($SitemapUrl)
$urls = $sitemap.urlset.get_ChildNodes() | foreach { $_.loc }
$i = 0
while($i -lt $Max -and $i -lt $urls.Count)
{
Validate $urls[$i]
$i = $i+1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment