Last active
August 5, 2021 15:47
-
-
Save dthunziker/323eda7180bb2722f10213a920bfefb5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function Create-XmlSitemap { | |
<# | |
.SYNOPSIS | |
Generates an XML sitemap. | |
.PARAMETER SiteName | |
The name of the site defintion to generate the sitemap for. If omitted, the site definition matching the host name will be used. | |
.PARAMETER HostName | |
The host name that will appear in URLs. If omitted, the first hostname from the site definition will be used. | |
.PARAMETER HttpProtocol | |
The protocol that will appear in URLs. Defaults to "https". | |
.PARAMETER StartPath | |
The starting path for where to crawl items. If omitted, this will default to the StartPath of the site definition. | |
.PARAMETER FileName | |
The name of the file to generate. Defaults to "sitemap_[SiteName].xml". | |
.PARAMETER TargetPath | |
The destination path for where to generate the XML file. If omitted, this will default to your application's root directory. | |
.PARAMETER UrlOptions | |
The options to use when generating URLs. If omitted, the default URL options will be used. (i.e. Sitecore.Links.LinkManager.GetDefaultUrlOptions()) | |
.PARAMETER DefaultLanguage | |
The default language. If omitted, the site definitions default language will be used. The URL for this language will appear in the loc element. | |
.PARAMETER Languages | |
A list of languages to generate links for. If omitted, the item's language collection will be used. | |
.PARAMETER AppendLinks | |
Determines whether or not to include the xhtml:link elements, which includes all the specified Languages, with each URL. Defaults to true. | |
.PARAMETER LastModifiedFieldName | |
The name of the Last Modified Datetime field, used for generating <lastmod> elements. (ex: __Updated) | |
.PARAMETER ChangeFrequencyFieldName | |
The name of the Change Frequency field, used for generating <changefreq> elements. (example values: always, hourly, daily, monthly) | |
.PARAMETER PriorityFieldName | |
The name of the Priority field, used for generating <priority> elements. (example values: 1.0, 0.9, 0.5) | |
.PARAMETER Database | |
The name of the database to use while searching for items. Defaults to 'web'. | |
.PARAMETER TrailingSlash | |
Determines whether or not to append a trailing slash to URLs. Defaults to false. | |
.PARAMETER ShowProgress | |
Determines whether or not to show progress in the SPE console. Defaults to true. | |
.PARAMETER WhitelistCheckboxFieldNames | |
A comma separated list of checkbox field names and the value the name should match, used to determine which items should be included in the sitemap. | |
.PARAMETER WhitelistTemplateNames | |
A comma separated list of template names. Items inheriting from these templates will be included in the sitemap. | |
.PARAMETER BlacklistFilePaths | |
An array of file paths. This file path and all its subitems will be included in the sitemap. | |
.PARAMETER BlacklistCheckboxFieldNames | |
A comma separated list of checkbox field names and the value the name should match, used to determine which items should be excluded from the sitemap. | |
.PARAMETER BlacklistTemplateNames | |
A comma separated list of template names. Items inheriting from these templates will be excluded from the sitemap. | |
.PARAMETER BlacklistFilePaths | |
An array of file paths. This file path and all its subitems will be excluded from the sitemap. | |
.PARAMETER StripDefaultLanguageFromHomepage | |
Strips the default language from the homepage URL. Defaults to false. | |
.PARAMETER StripDefaultLanguageFromLocUrls | |
Strips the default lanauge from all <loc> URLs. Defaults to false. | |
.PARAMETER StripDefaultLanguageFromAltUrls | |
Strips the default lanauge from all URLs (both <loc> and <alternate>). Defaults to false. | |
.PARAMETER MediaLibraryPath | |
The full path to the media library item to create/update with the Sitemap. If set, this setting takes precidence over the TargetPath option. | |
.PARAMETER PublishMediaItem | |
Determines whether or not to publish the sitemap media item after updating it. Defaults to false. | |
.PARAMETER PublishingOptions | |
The Sitecore.Publishing.PublishOptions used if PublishMediaItem is set to true. By default, the sitemap media item will be be published from master to web. | |
.EXAMPLE | |
Generate a sitemap for the default website | |
PS master:\> Generate-Sitemap | |
.NOTES | |
Derek Hunziker | |
www.layerworks.com | |
@dthunziker | |
#> | |
[CmdletBinding()] | |
param ( | |
[string]$SiteName, | |
[string]$HostName = $null, | |
[ValidateNotNullOrEmpty()] | |
[string]$HttpProtocol = "https", | |
[string]$StartPath, | |
[string]$FileName, | |
[ValidateNotNullOrEmpty()] | |
[string]$TargetPath = $AppPath, | |
[ValidateNotNull()] | |
[Sitecore.Links.UrlOptions]$UrlOptions = [Sitecore.Links.LinkManager]::GetDefaultUrlOptions(), | |
[string]$DefaultLanguage, | |
[string]$Languages, | |
[ValidateNotNull()] | |
[bool]$AppendLinks = $true, | |
[string]$LastModifiedFieldName, | |
[string]$ChangeFrequencyFieldName, | |
[string]$PriorityFieldName, | |
[ValidateNotNullOrEmpty()] | |
[string]$Database = "web", | |
[ValidateNotNull()] | |
[bool]$TrailingSlash = $false, | |
[ValidateNotNull()] | |
[bool]$ShowProgress = $true, | |
[hashtable]$WhitelistCheckboxFieldNames, | |
[string]$WhitelistTemplateNames, | |
[array]$WhitelistFilePaths, | |
[hashtable]$BlacklistCheckboxFieldNames, | |
[string]$BlacklistTemplateNames, | |
[array]$BlacklistFilePaths, | |
[ValidateNotNull()] | |
[bool]$StripDefaultLanguageFromHomepage = $false, | |
[ValidateNotNull()] | |
[bool]$StripDefaultLanguageFromLocUrls = $false, | |
[ValidateNotNull()] | |
[bool]$StripDefaultLanguageFromAltUrls = $false, | |
[string]$MediaLibraryPath = $null, | |
[bool]$PublishMediaItem = $false, | |
[Sitecore.Publishing.PublishOptions]$PublishingOptions = $null | |
) | |
begin { | |
$stopWatch = [System.Diagnostics.Stopwatch]::StartNew() | |
# Resolve site name using the host, if not specified. | |
if ([string]::IsNullOrEmpty($SiteName)) { | |
$site = [Sitecore.Sites.SiteContextFactory]::GetSiteContext($HostName, "/") | |
if ($site -ne $null) { | |
$SiteName = $site.Name | |
} | |
if ([string]::IsNullOrEmpty($SiteName)) { | |
$SiteName = "website" | |
} | |
} | |
# Blow up if a site could not be resolved. | |
$site = [Sitecore.Sites.SiteContext]::GetSite($SiteName) | |
if ($site -eq $null) { | |
throw [System.Exception] "$SiteName site definition not found." | |
} | |
# Use the site definition's default host name (or current host name) if one was not specified. | |
if ([string]::IsNullOrEmpty($HostName)) { | |
if (![string]::IsNullOrEmpty($site.HostName)) { | |
$HostName = $site.HostName.Split("|")[0] | |
} | |
else { | |
$HostName = "localhost" | |
} | |
} | |
# Use the site definition's language if no default language was defined. | |
if ([string]::IsNullOrEmpty($DefaultLanguage)) { | |
$DefaultLanguage = $site.Language | |
if ([string]::IsNullOrEmpty($DefaultLanguage)) { | |
$DefaultLanguage = "en" | |
} | |
} | |
# Use the site definition's start path if no starting path was defined. | |
if ([string]::IsNullOrEmpty($StartPath)) { | |
$StartPath = $site.StartPath | |
} | |
# Use sitemap_[sitename].xml as the filename if one wasn't specified. | |
if ([string]::IsNullOrEmpty($FileName)) { | |
$FileName = "sitemap_" + $site.Name + ".xml" | |
} | |
# We'll be including the protocol and host manually | |
$UrlOptions.AlwaysIncludeServerUrl = $false | |
$UrlOptions.Site = $site; | |
} | |
process { | |
$itemsToProcess = New-Object System.Collections.Generic.List[System.Object] | |
$script:totalProcessed = 0 | |
$script:totalLocs = 0 | |
$script:totalAlts = 0 | |
$script:totalSkipped = 0 | |
function Get-Url($item, $alt = $false) { | |
$defaultLang = "/$DefaultLanguage"; | |
$url = [Sitecore.Links.LinkManager]::GetItemUrl($item, $UrlOptions) | |
# Strip default language from homepage | |
if ($StripDefaultLanguageFromHomepage -eq $true -and $url -eq $defaultLang) { | |
$url = "" | |
} | |
# Strip default language from location urls | |
if ($alt -eq $false -and $StripDefaultLanguageFromLocUrls -eq $true -and $url.StartsWith($defaultLang)) { | |
$url = $url.Substring($url.IndexOf($defaultLang) + $defaultLang.Length) | |
} | |
# Strip default language from alternate urls | |
if ($alt -eq $true -and $StripDefaultLanguageFromAltUrls -eq $true -and $url.StartsWith($defaultLang)) { | |
$url = $url.Substring($url.IndexOf($defaultLang) + $defaultLang.Length) | |
} | |
# Append trailing slash, if specified | |
if ($TrailingSlash -eq $true -and !$url.EndsWith("/")) { | |
$url = $url + "/" | |
} | |
if (-not ($url.StartsWith("http"))) { | |
$url = $HttpProtocol + "://" + $HostName + $url | |
} | |
return $url | |
} | |
function Is-Derived($item, $templateName) { | |
if ($item -eq $null -or [string]::IsNullOrEmpty($templateName)) { | |
return $false | |
} | |
$itemTemplate = [Sitecore.Data.Managers.TemplateManager]::GetTemplate($item); | |
return $itemTemplate -ne $null -and ($itemTemplate.Name -eq $templateName -or $itemTemplate.InheritsFrom($templateName)) | |
} | |
function Is-Child($item, $filePath) { | |
if ($item -eq $null -or [string]::IsNullOrEmpty($filePath)) { | |
return $false | |
} | |
$itemPath = $item.Paths.FullPath | |
return $itemPath -ne $null -and ($itemPath -like "*$filePath*") | |
} | |
function Get-Qualified-Items($items) { | |
$counter = 0 | |
$qualifiedItems = New-Object System.Collections.Generic.List[System.Object] | |
$items | ForEach-Object { | |
if ($ShowProgress) { | |
Write-Progress ` | |
-Activity "Qualifying Items" ` | |
-Status "Determining what items to include..." ` | |
-PercentComplete (($counter / $items.Count) * 100); | |
} | |
$item = $_ | |
# Skip items with no presentation | |
if ($item.Visualization.Layout -eq $null) { | |
$script:totalSkipped++ | |
return | |
} | |
# Skip items that aren't whitelisted | |
if ($WhitelistCheckboxFieldNames.Count -ne 0 -and $WhitelistCheckboxFieldNames -ne $null) { | |
$valid = $false | |
ForEach ($cb In $WhitelistCheckboxFieldNames.GetEnumerator()) { | |
if ($item[$cb.Name] -eq $cb.Value) { | |
$valid = $true | |
} | |
} | |
if (!$valid) { | |
$script:totalSkipped++ | |
return | |
} | |
} | |
$whitelistTemplates = $WhitelistTemplateNames.Split(",", [System.StringSplitOptions]::RemoveEmptyEntries) | |
ForEach ($templateName In $whitelistTemplates) { | |
if ((Is-Derived $item $templateName.Trim()) -eq $false) { | |
$script:totalSkipped++ | |
return | |
} | |
} | |
ForEach ($filePath In $WhitelistFilePaths) { | |
if ((Is-Child $item $filePath.Trim()) -eq $false) { | |
$script:totalSkipped++ | |
return | |
} | |
} | |
# Skip blacklisted items | |
if ($BlacklistCheckboxFieldNames.Count -ne 0 -and $BlacklistCheckboxFieldNames -ne $null) { | |
$valid = $true | |
ForEach ($cb In $BlacklistCheckboxFieldNames.GetEnumerator()) { | |
if ($item[$cb.Name] -eq $cb.Value) { | |
$valid = $false | |
} | |
} | |
if (!$valid) { | |
$script:totalSkipped++ | |
return | |
} | |
} | |
$blacklistTemplates = $BlacklistTemplateNames.Split(",", [System.StringSplitOptions]::RemoveEmptyEntries) | |
ForEach ($templateName In $blacklistTemplates) { | |
if ((Is-Derived $item $templateName.Trim()) -eq $true) { | |
$script:totalSkipped++ | |
return | |
} | |
} | |
ForEach ($filePath In $BlacklistFilePaths) { | |
if ((Is-Child $item $filePath.Trim()) -eq $true) { | |
$script:totalSkipped++ | |
return | |
} | |
} | |
$qualifiedItems.Add($item) | |
$counter++ | |
} | |
return $qualifiedItems | |
} | |
function Append-Loc($item, $url, $doc) { | |
$loc = $doc.CreateElement("loc") | |
$langSwitcher = New-Object Sitecore.Globalization.LanguageSwitcher $DefaultLanguage | |
$loc.InnerText = Get-Url $item $true | |
$url.AppendChild($loc) | Out-null | |
$langSwitcher.Dispose() | |
if ($AppendLinks) { | |
# Use the item's languages if no explicit ones were specified | |
$languagesToProcess = $Languages.Split(",", [System.StringSplitOptions]::RemoveEmptyEntries) | |
if ($languagesToProcess.Count -eq 0) { | |
$languagesToProcess = @($item.Languages | Select-Object -ExpandProperty Name) | |
} | |
$languagesToProcess | ForEach-Object { | |
$language = $_.Trim() | |
# Ensure there's a version for this language, or that we're not enforcing version presence | |
if ([Sitecore.Data.Managers.ItemManager]::GetVersions($item, $language).Count -gt 0 -or $item[[Sitecore.FieldIDs]::EnforceVersionPresence] -ne "1") { | |
$langSwitcher = New-Object Sitecore.Globalization.LanguageSwitcher $language | |
$link = $doc.CreateElement("xhtml", "link", "http://www.w3.org/1999/xhtml") | |
$link.SetAttribute("rel", "alternate") | |
$link.SetAttribute("hreflang", $language) | |
$link.SetAttribute("href", (Get-Url $item $true)) | |
$langSwitcher.Dispose() | |
$url.AppendChild($link) | Out-null | |
$script:totalAlts++ | |
} | |
} | |
} | |
} | |
function Append-LastMod($item, $url, $doc) { | |
if (![string]::IsNullOrEmpty($LastModifiedFieldName)) { | |
$date = ([Sitecore.DateUtil]::IsoDateToDateTime($item[$LastModifiedFieldName])) | |
$mod = $doc.CreateElement("lastmod") | |
$mod.InnerText = $date.ToString("s") + "Z" | |
$url.AppendChild($mod) | Out-null | |
} | |
} | |
function Append-ChangeFreq($item, $url, $doc) { | |
if (![string]::IsNullOrEmpty($ChangeFrequencyFieldName)) { | |
$freq = $doc.CreateElement("changefreq") | |
$freq.InnerText = $item[$ChangeFrequencyFieldName] | |
$url.AppendChild($freq) | Out-null | |
} | |
} | |
function Append-Priority($item, $url, $doc) { | |
if (![string]::IsNullOrEmpty($PriorityFieldName)) { | |
$pri = $doc.CreateElement("priority") | |
$pri.InnerText = $item[$PriorityFieldName] | |
$url.AppendChild($pri) | Out-null | |
} | |
} | |
function Get-ItemsRecursive($item) { | |
$itemsToProcess.Add($item) | |
foreach ($child in $item.Children) { | |
Get-ItemsRecursive $child | |
} | |
} | |
$error.clear() | |
# XML Document Setup | |
[xml]$doc = New-Object System.Xml.XmlDocument | |
$docNode = $doc.CreateXmlDeclaration("1.0", "UTF-8", $null) | |
$doc.AppendChild($docNode) | Out-null | |
$urlSet = $doc.CreateElement("urlset") | |
$urlSet.SetAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9") | |
$urlSet.SetAttribute("xmlns:xhtml", "http://www.w3.org/1999/xhtml") | |
if ($ShowProgress) { | |
Write-Progress ` | |
-Activity "Initalizing" ` | |
-Status "Loading items into memory..."; | |
} | |
cd ($Database + ":" + $StartPath) | |
Get-ItemsRecursive (Get-Item .) | |
$script:totalProcessed = $itemsToProcess.Count | |
$itemsToProcess = Get-Qualified-Items $itemsToProcess | |
$itemsToProcess | ForEach-Object { | |
$item = $_ | |
if ($ShowProgress) { | |
Write-Progress ` | |
-Activity "Generating Sitemap" ` | |
-Status "Processing pages..." ` | |
-PercentComplete (($itemCounter / $itemsToProcess.Count) * 100); | |
} | |
$url = $doc.CreateElement("url") | |
Append-Loc $item $url $doc | |
Append-LastMod $item $url $doc | |
Append-ChangeFreq $item $url $doc | |
Append-Priority $item $url $doc | |
$urlSet.AppendChild($url) | Out-null | |
$script:totalLocs++ | |
} | |
$doc.AppendChild($urlSet) | Out-null | |
[string]$target = "" | |
# Check if the MediaLibraryPath variable is set, if so write the sitemap to that item. | |
if (![string]::IsNullOrEmpty($MediaLibraryPath)) { | |
$target = $MediaLibraryPath | |
$stream = New-Object System.IO.MemoryStream | |
$doc.Save($stream) | |
[Sitecore.Resources.Media.MediaManager]::Creator.AttachStreamToMediaItem($stream, $MediaLibraryPath, $FileName, [Sitecore.Resources.Media.MediaCreatorOptions]::Empty) | Out-null | |
$stream.Dispose() | |
# Check if the media item should be published. | |
if ($PublishMediaItem) { | |
if ($PublishingOptions -eq $null) { | |
$masterDB = [Sitecore.Configuration.Factory]::GetDatabase("master") | |
$webDB = [Sitecore.Configuration.Factory]::GetDatabase("web") | |
$PublishingOptions = New-Object Sitecore.Publishing.PublishOptions($masterDB, $webDB, [Sitecore.Publishing.PublishMode]::SingleItem, [Sitecore.Context]::Language, [System.DateTime]::Now) | |
$PublishingOptions.RootItem = Get-Item "master:$MediaLibraryPath" | |
$PublishingOptions.Deep = $false | |
} | |
$publisher = New-Object Sitecore.Publishing.Publisher $PublishingOptions | |
$publisher.Publish() | |
} | |
} | |
# Output to file. | |
else { | |
$target = "$TargetPath$FileName" | |
$doc.Save($target) | |
} | |
$stopWatch.Stop() | |
} | |
end { | |
$report = @{ | |
TotalItemsProcessed = $script:totalProcessed | |
TotalUrls = ($script:totalLocs + $script:totalAlts).ToString() + " ($script:totalLocs urls, $script:totalAlts alternate urls)" | |
SkippedUrls = $script:totalSkipped | |
ElapsedSeconds = $stopWatch.Elapsed.TotalSeconds | |
Parameters = $PsBoundParameters | |
WrittenTo = $target | |
} | |
if (!$error) { | |
Write-Host ($report | Format-Table | Out-String) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Excellent script!
I used it to feed a JMeter script. One minor problem issue (due to an MS bug) is that it generates the sitemap.xml file as UTF-8 with BOM. The BOM was making my script blow up. I used this SO answer to solve the problem, essentially creating the XML file without the XML declaration (comment out lines 438-439) since actually optional and it is what triggers the MS bug.
Cheers!