Created
October 6, 2015 12:54
-
-
Save Atorich/9beabffdd6aa8cdbab7d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
import datetime | |
from flask import Blueprint, render_template, make_response, url_for, abort, \ | |
current_app as app | |
from werkzeug.local import LocalProxy | |
from werkzeug.routing import Rule | |
blueprint = Blueprint('sitemap', __name__, template_folder='templates') | |
CHANGEFREQ_ALWAYS = 'always' | |
CHANGEFREQ_HOURLY = 'hourly' | |
CHANGEFREQ_DAILY = 'daily' | |
CHANGEFREQ_WEEKLY = 'weekly' | |
CHANGEFREQ_MONTHLY = 'monthly' | |
CHANGEFREQ_YEARLY = 'yearly' | |
CHANGEFREQ_NEVER = 'never' | |
now = datetime.datetime.utcnow() | |
default_config = { | |
'SITEMAP_MAX_PER_GROUP': 500, | |
'SITEMAP_INDEX_PAGE_ENDPOINT': 'index', | |
'SITEMAP_INDEX_PAGE_PRIORITY': 1, | |
} | |
class SitemapGroupOverflowError(Exception): | |
""" | |
Exeption raises when sitemap group length | |
is more than SITEMAP_MAX_PER_GROUP | |
""" | |
pass | |
class SitemapIndex(object): | |
""" | |
Class represents sitemap index | |
It manages adding a resource to index, | |
handles resource grouping etc. | |
""" | |
__slots__ = ('_groups',) | |
def __init__(self, groups=None): | |
self._groups = groups or [] | |
@property | |
def last_group(self): | |
return self._groups[-1] | |
def _create_next_group(self): | |
return SitemapGroup( | |
loc=url_for( | |
'sitemap.sitemap_group', | |
group=len(self), | |
_external=True), | |
lastmod=now | |
) | |
def append(self, o): | |
try: | |
group = self.last_group | |
group.append(o) | |
return group | |
except (IndexError, SitemapGroupOverflowError): | |
group = self._create_next_group() | |
self._groups.append(group) | |
return self.append(o) | |
def __len__(self): | |
return len(self._groups) | |
def __iter__(self): | |
return iter(self._groups) | |
def __getitem__(self, item): | |
try: | |
return self._groups[item] | |
except IndexError: | |
return None | |
class SitemapGroup(object): | |
""" | |
Class represents sitemap group | |
See See http://www.sitemaps.org/protocol.html | |
""" | |
MAX_PER_GROUP = 50000 | |
__slots__ = ('loc', 'lastmod', '_items') | |
def __init__(self, loc, lastmod, items=None): | |
assert isinstance(lastmod, datetime.datetime) | |
self.loc = loc | |
self.lastmod = "%sZ" % lastmod | |
self._items = items or [] | |
def append(self, o): | |
if len(self) < self.MAX_PER_GROUP: | |
self._items.append(o) | |
else: | |
raise SitemapGroupOverflowError | |
def __len__(self): | |
return len(self._items) | |
def __iter__(self): | |
return iter(self._items) | |
class SitemapItem(object): | |
""" | |
Class represents sitemap item | |
See http://www.sitemaps.org/protocol.html | |
""" | |
__slots__ = ('loc', 'lastmod', 'priority', 'changefreq') | |
def __init__(self, loc, lastmod, priority=None, changefreq=None): | |
assert isinstance(lastmod, datetime.datetime) | |
if isinstance(loc, Rule): | |
self.loc = url_for(loc.endpoint, _external=True) | |
elif isinstance(loc, basestring): | |
self.loc = loc | |
else: | |
raise ValueError( | |
"Loc should be an instance of string or werkzeug.Rule" | |
) | |
self.lastmod = "%sZ" % lastmod | |
self.priority = priority | |
self.changefreq = changefreq | |
class SitemapSource(object): | |
""" | |
Class represents sitemap source like Post, User etc. | |
Incapsulates data retrieving and pre-processing | |
""" | |
changefreq = None | |
priority = None | |
def items(self): | |
raise NotImplementedError | |
def lastmod(self, o): | |
raise NotImplementedError | |
def url(self, o): | |
raise NotImplementedError | |
class Sitemap(object): | |
index_cls = SitemapIndex | |
config = {} | |
sources = {} | |
def __init__(self, app): | |
self.app = app | |
if app: | |
self.init_app(app) | |
def init_app(self, app): | |
for key in default_config.keys(): | |
self.config[key] = app.config.get( | |
key, | |
default_config[key] | |
) | |
app.extensions['sitemap'] = self | |
self.register_blueprint(app) | |
def register_blueprint(self, app): | |
app.register_blueprint(blueprint) | |
def register_source(self, source): | |
assert isinstance(source, SitemapSource) | |
self.sources[type(source)] = source | |
@staticmethod | |
def build_item_from_source(source, o): | |
assert isinstance(source, SitemapSource) | |
return SitemapItem( | |
loc=source.url(o), | |
lastmod=source.lastmod(o), | |
priority=source.priority, | |
changefreq=source.changefreq | |
) | |
def build_index(self): | |
index = SitemapIndex() | |
# main page | |
index_url = SitemapItem( | |
loc=url_for( | |
self.config['SITEMAP_INDEX_PAGE_ENDPOINT'], _external=True | |
), | |
priority=self.config['SITEMAP_INDEX_PAGE_PRIORITY'], | |
lastmod=now, | |
changefreq=CHANGEFREQ_DAILY | |
) | |
index.append(index_url) | |
for source in self.sources.values(): | |
for item in source.items: | |
item = self.build_item_from_source(source, item) | |
index.append(item) | |
return index | |
def _get_cache(): | |
class DummyCache(object): | |
def cached(self, *args, **kwargs): | |
pass | |
@property | |
def cache(self): | |
return self | |
def proxy_func(): | |
try: | |
from app import cache | |
return cache | |
except ImportError: | |
return DummyCache() | |
return LocalProxy( | |
proxy_func | |
) | |
def sitemap_response(response_str): | |
response = make_response(response_str) | |
response.headers["Content-Type"] = "application/xml" | |
return response | |
_cache = _get_cache() | |
_sitemap = LocalProxy( | |
lambda: app.extensions['sitemap'] | |
) | |
@blueprint.route('/sitemap.xml', methods=['GET']) | |
def sitemap_index(): | |
@_cache.cached(timeout=3600) | |
def cached(*args, **kwargs): | |
index = _sitemap.build_index() | |
return sitemap_response( | |
render_template('sitemap/index.xml', index=index) | |
) | |
return cached() | |
@blueprint.route('/sitemap.<int:group>.xml') | |
def sitemap_group(group): | |
@_cache.cached(timeout=3600) | |
def cached(*args, **kwargs): | |
index = _sitemap.build_index() | |
group_obj = index[group] | |
if not group_obj: | |
abort(404) | |
return sitemap_response( | |
render_template('sitemap/group.xml', group=group_obj) | |
) | |
return cached() | |
__all__ = ('Sitemap', 'SitemapSource') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment