Skip to content

Instantly share code, notes, and snippets.

@dbowling
Created March 4, 2022 17:43
Show Gist options
  • Save dbowling/8dcb55933eeeecf92d9ee7fa1f289d6c to your computer and use it in GitHub Desktop.
Save dbowling/8dcb55933eeeecf92d9ee7fa1f289d6c to your computer and use it in GitHub Desktop.
Cascade velocity macro for sitemap.xml
## // TODO:
## <loc>https://www.umt.edu/academics/programs/_archive/questions/communication-studies/images/default.php</loc>
## // if not yet published
## // if last modified is greater than last published
## // create system tag to include in sitemap
## // create sitemap index https://www.sitemaps.org/protocol.html | https://moz.com/sitemaps-1-sitemap.xml
## // sitemap index can auto find all sitemaps in the domain by query tool
## // if not include in navigation, should we exclude from the sitemap?
## // changefreq value from metadata (or content type/direct path match e.g. homepage, news site)
## // priority from metadata (or content type)
## // featured news articles
## #set($query = $query.searchAcrossAllSites() )
## <?xml-stylesheet type="text/xsl" href="sitemap.xsl"?>
#*
Create a base query that can be run against specific sites:
*#
#set($query = $_.query() )
#set($query = $query.publishableOnly(true).indexableOnly(true) )
#set($query = $query.includePages(true).includeFolders(false).includeBlocks(false).includeSymlinks(false) )
#*
Collect the pages we want in the sitemap from specific conditions per site:
*#
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
#sitemapUrlElementList( $query.siteName("umt.edu").byMetadataSet("UM Landing Page").execute(), 1 )
#sitemapUrlElementList( $query.siteName("umt.edu").byMetadataSet("Unit Page").execute(), 0.9 )
#sitemapUrlElementList( $query.siteName("umt.edu").byMetadataSet("Interior Page").execute(), 0.6 )
#sitemapUrlElementList( $query.siteName("Admissions").byMetadataSet("Unit Page").execute(), 0.7 )
#sitemapUrlElementList( $query.siteName("Financial Aid").byMetadataSet("Unit Page").execute(), 0.7 )
#sitemapUrlElementList( $query.siteName("Griz Card Center").byMetadataSet("Unit Page").execute() )
#sitemapUrlElementList( $query.siteName("Orientation").byMetadataSet("Unit Page").execute() )
#sitemapUrlElementList( $query.siteName("Registrar").byMetadataSet("Unit Page").execute() )
</urlset>
#*
Macros:
MACRO: sitemapUrlElement()
DESCRIPTION: Print an entire <urlset> given a list of pages
PARAMS:
$pages required iterable list of page API objects
$priority optional value between 0.1 and 1.0, relative priority to bots
$maxDepth optional number of URL segments beyond the site path to process
*#
#macro( sitemapUrlElementList $pages, $priority, $maxDepth )
#if($_PropertyTool.isEmpty($maxDepth)) #set($maxDepth = 10) #end
#foreach ( $page in $pages )
#set( $depth = $page.path.split("/").size() )
#if ( $depth > $maxDepth )
#elseif ( $page.path.contains("/imx/") )
#elseif ( $page.path.contains("/_") )
#elseif ( $page.path.startsWith("_") )
#elseif ( $page.path.startsWith("my/") )
#else
#sitemapSingleUrlElement($page, $priority)
#end
#end
#end
#*
sitemapSingleUrlElement
Takes a single Page
print an individual <url>, for use in a <urlset>
*#
#macro( sitemapSingleUrlElement $page, $priority )
#if($_PropertyTool.isEmpty($priority)) #set($priority = 0.5) #end
#if($priority < 0.1 || $priority > 1) #set($priority = 0.5) #end
<url>
<loc>#absoluteCanonicalPageURL($page)</loc>
<lastmod>$_DateTool.format('yyyy-MM-dd', $page.lastModified)</lastmod>
<priority>${priority}</priority>
</url>
#end
#*
absoluteCanonicalPageURL
Takes a single page
Prints the canonical link to a Page as an absolute URL
*#
#macro( absoluteCanonicalPageURL $page )$_EscapeTool.xml("${page.site.url}/${page.path}.php").replace("//", "/").replace(":/", "://").replace(" ", "%20")#end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment