Skip to content

Instantly share code, notes, and snippets.

@Atorich
Created October 6, 2015 12:54
Show Gist options
  • Save Atorich/9beabffdd6aa8cdbab7d to your computer and use it in GitHub Desktop.
Save Atorich/9beabffdd6aa8cdbab7d to your computer and use it in GitHub Desktop.
# coding=utf-8
import datetime
from flask import Blueprint, render_template, make_response, url_for, abort, \
current_app as app
from werkzeug.local import LocalProxy
from werkzeug.routing import Rule
blueprint = Blueprint('sitemap', __name__, template_folder='templates')
CHANGEFREQ_ALWAYS = 'always'
CHANGEFREQ_HOURLY = 'hourly'
CHANGEFREQ_DAILY = 'daily'
CHANGEFREQ_WEEKLY = 'weekly'
CHANGEFREQ_MONTHLY = 'monthly'
CHANGEFREQ_YEARLY = 'yearly'
CHANGEFREQ_NEVER = 'never'
now = datetime.datetime.utcnow()
default_config = {
'SITEMAP_MAX_PER_GROUP': 500,
'SITEMAP_INDEX_PAGE_ENDPOINT': 'index',
'SITEMAP_INDEX_PAGE_PRIORITY': 1,
}
class SitemapGroupOverflowError(Exception):
"""
Exeption raises when sitemap group length
is more than SITEMAP_MAX_PER_GROUP
"""
pass
class SitemapIndex(object):
"""
Class represents sitemap index
It manages adding a resource to index,
handles resource grouping etc.
"""
__slots__ = ('_groups',)
def __init__(self, groups=None):
self._groups = groups or []
@property
def last_group(self):
return self._groups[-1]
def _create_next_group(self):
return SitemapGroup(
loc=url_for(
'sitemap.sitemap_group',
group=len(self),
_external=True),
lastmod=now
)
def append(self, o):
try:
group = self.last_group
group.append(o)
return group
except (IndexError, SitemapGroupOverflowError):
group = self._create_next_group()
self._groups.append(group)
return self.append(o)
def __len__(self):
return len(self._groups)
def __iter__(self):
return iter(self._groups)
def __getitem__(self, item):
try:
return self._groups[item]
except IndexError:
return None
class SitemapGroup(object):
"""
Class represents sitemap group
See See http://www.sitemaps.org/protocol.html
"""
MAX_PER_GROUP = 50000
__slots__ = ('loc', 'lastmod', '_items')
def __init__(self, loc, lastmod, items=None):
assert isinstance(lastmod, datetime.datetime)
self.loc = loc
self.lastmod = "%sZ" % lastmod
self._items = items or []
def append(self, o):
if len(self) < self.MAX_PER_GROUP:
self._items.append(o)
else:
raise SitemapGroupOverflowError
def __len__(self):
return len(self._items)
def __iter__(self):
return iter(self._items)
class SitemapItem(object):
"""
Class represents sitemap item
See http://www.sitemaps.org/protocol.html
"""
__slots__ = ('loc', 'lastmod', 'priority', 'changefreq')
def __init__(self, loc, lastmod, priority=None, changefreq=None):
assert isinstance(lastmod, datetime.datetime)
if isinstance(loc, Rule):
self.loc = url_for(loc.endpoint, _external=True)
elif isinstance(loc, basestring):
self.loc = loc
else:
raise ValueError(
"Loc should be an instance of string or werkzeug.Rule"
)
self.lastmod = "%sZ" % lastmod
self.priority = priority
self.changefreq = changefreq
class SitemapSource(object):
"""
Class represents sitemap source like Post, User etc.
Incapsulates data retrieving and pre-processing
"""
changefreq = None
priority = None
def items(self):
raise NotImplementedError
def lastmod(self, o):
raise NotImplementedError
def url(self, o):
raise NotImplementedError
class Sitemap(object):
index_cls = SitemapIndex
config = {}
sources = {}
def __init__(self, app):
self.app = app
if app:
self.init_app(app)
def init_app(self, app):
for key in default_config.keys():
self.config[key] = app.config.get(
key,
default_config[key]
)
app.extensions['sitemap'] = self
self.register_blueprint(app)
def register_blueprint(self, app):
app.register_blueprint(blueprint)
def register_source(self, source):
assert isinstance(source, SitemapSource)
self.sources[type(source)] = source
@staticmethod
def build_item_from_source(source, o):
assert isinstance(source, SitemapSource)
return SitemapItem(
loc=source.url(o),
lastmod=source.lastmod(o),
priority=source.priority,
changefreq=source.changefreq
)
def build_index(self):
index = SitemapIndex()
# main page
index_url = SitemapItem(
loc=url_for(
self.config['SITEMAP_INDEX_PAGE_ENDPOINT'], _external=True
),
priority=self.config['SITEMAP_INDEX_PAGE_PRIORITY'],
lastmod=now,
changefreq=CHANGEFREQ_DAILY
)
index.append(index_url)
for source in self.sources.values():
for item in source.items:
item = self.build_item_from_source(source, item)
index.append(item)
return index
def _get_cache():
class DummyCache(object):
def cached(self, *args, **kwargs):
pass
@property
def cache(self):
return self
def proxy_func():
try:
from app import cache
return cache
except ImportError:
return DummyCache()
return LocalProxy(
proxy_func
)
def sitemap_response(response_str):
response = make_response(response_str)
response.headers["Content-Type"] = "application/xml"
return response
_cache = _get_cache()
_sitemap = LocalProxy(
lambda: app.extensions['sitemap']
)
@blueprint.route('/sitemap.xml', methods=['GET'])
def sitemap_index():
@_cache.cached(timeout=3600)
def cached(*args, **kwargs):
index = _sitemap.build_index()
return sitemap_response(
render_template('sitemap/index.xml', index=index)
)
return cached()
@blueprint.route('/sitemap.<int:group>.xml')
def sitemap_group(group):
@_cache.cached(timeout=3600)
def cached(*args, **kwargs):
index = _sitemap.build_index()
group_obj = index[group]
if not group_obj:
abort(404)
return sitemap_response(
render_template('sitemap/group.xml', group=group_obj)
)
return cached()
__all__ = ('Sitemap', 'SitemapSource')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment