Skip to content

Instantly share code, notes, and snippets.

@bobuss
Created July 18, 2012 13:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bobuss/3136155 to your computer and use it in GitHub Desktop.
Save bobuss/3136155 to your computer and use it in GitHub Desktop.
Sitemap generator in python with a mustache template
import sys
import codecs
import pystache
import os
import fnmatch
import string
import datetime
webroot = 'http://www.example.com/
path = '/var/www/'
def getUrlsFromDirectory(root=os.curdir):
for path, dirs, files in os.walk(os.path.abspath(root)):
for filename in fnmatch.filter(files, "*.html"):
yield os.path.join(path.replace(os.path.abspath(root), ''), filename)
template = open('./template.xml').read().decode('utf8')
urls = []
dtnow = datetime.datetime.now().isoformat()
# The template waits for data as
# {
# 'root' : The root URL,
# 'urls' : [{
# 'loc' : 'page1.html',
# 'lastmod' : '2012-07-18T14:59:22.354039',
# 'changefreq' : 'daily'
# },{
# 'loc' : 'page2.html',
# 'lastmod' : '2012-07-18T14:59:22.354039',
# 'changefreq' : 'weekly'
# }]
# }}
for url in getUrlsFromDirectory(path):
urls.append({
'loc' : string.lstrip(url, '/').replace('index.html', ''),
'lastmod' : dtnow,
'changefreq' : 'daily'
})
f = codecs.open(os.path.join(path, 'sitemap.xml'), 'w', 'utf8')
f.write(pystache.render(template, {
'root': webroot,
'urls': urls
}))
f.close()
<?xml version="1.0" encoding="UTF-8"?>
<urlset
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
{{#urls}}
<url>
<loc>{{ root }}{{ loc }}</loc>
<lastmod>{{ lastmod }}</lastmod>
<changefreq>{{ changefreq }}</changefreq>
</url>
{{/urls}}
</urlset>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment