A PowerShell script that downloads an XML Sitemap and parses the content to retrieve all the information. If the file contains Sitemap Index entries those links will be followed as well.
<# | |
.SYNOPSIS | |
Returns all URLs and optional metadata from an XML Sitemap | |
.DESCRIPTION | |
Downloads an XML Sitemap and parses the content to retrieve all the information. If the file contains Sitemap Index entries those links will be followed as well. | |
.NOTES | |
Author: Michaël Hompus | |
License: This work is licensed under a Creative Commons Attribution-ShareAlike 2.5 Generic License. | |
.PARAMETER Url | |
The URL for a XML Sitemap file | |
.PARAMETER NoFollow | |
Indicates any Sitemap Index entries won't be followed | |
.LINK | |
Invoke-RestMethod | |
https://gist.github.com/eNeRGy164/a644417b737eb5d3af3c80d4ceb527e1 | |
.EXAMPLE | |
ConvertFrom-SiteMap.ps1 -Url https://blog.hompus.nl/sitemap.xml | |
.EXAMPLE | |
ConvertFrom-SiteMap.ps1 -Url https://blog.hompus.nl/sitemap.xml -NoFollow | |
#> | |
[CmdletBinding()] | |
param( | |
[parameter(Mandatory = $true, HelpMessage = "The URL for a XML Sitemap file")] | |
[string]$Url, | |
[parameter(HelpMessage = "Indicates any Sitemap Index entries won't be followed")] | |
[switch]$NoFollow | |
) | |
Function ParseSitemap($url) { | |
Write-Verbose "Fetching file from $url" | |
$sitemap = Invoke-RestMethod $url -UseBasicParsing | |
Write-Verbose "Parsing sitemap" | |
if ($sitemap.sitemapindex -ne $null -and !$NoFollow) { | |
Write-Verbose "Following sitemapindex entries" | |
$sitemap.sitemapindex.sitemap.loc | ForEach-Object { | |
ParseSitemap -url $_ | |
} | |
} | |
if ($sitemap.urlset -ne $null) { | |
Write-Verbose "Parsing urlset" | |
$sitemap.urlset.url | |
} | |
} | |
ParseSitemap($url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment