kgriffs/template_tree.py

## template_tree.py
# Copyright 2013 by Rackspace Hosting, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re

import six

_SIMPLE_PATTERN = re.compile(r'[0-9A-Za-z_~\-]+\Z')

# NOTE(kgriffs): No need to check for pct-encoded, because WSGI servers
# decode those before passing them to us via the PATH_INFO environ field.
_RESERVED_PATTERN = re.compile(
    r'[0-9A-Za-z_~\-:/\?#\[\]@\!\$&\'\(\)\*\+,;=]+\Z')


class TemplateError(Exception):
    pass


class InvalidExpansion(TemplateError):
    pass


class TemplateTree(object):
    __slots__ = ('_root')

    def __init__(self):
        self._root = {}

    def insert(self, template, method_map, resource):
        node = self._root

        assert template.startswith('/')
        if template != '/':
            assert not template.endswith('/')

            # NOTE(kgriffs): Remove initial slash since we want to
            # start traversing one level down from the root
            parts = template[1:].split('/')
            for part in parts:
                if part.startswith('{') and part.endswith('}'):
                    # Field expression
                    # if part[1] == '+':
                    #     # Level 2 reserved expansion
                    #     pattern = _RESERVED_PATTERN
                    # else:
                    #     pattern = _SIMPLE_PATTERN

                    # node['*'] = {'_fname': (part[1:-1], pattern)}

                    try:
                        child = node['*']
                    except KeyError:
                        child = {}

                    # Remember the name of the field so we can use it
                    # as a key in the match dict later on.
                    child['_fname'] = part[1:-1]
                    node['*'] = child

                    # Move to the child node
                    node = child

                else:
                    lowercased_part = part.lower()

                    try:
                        node = node[lowercased_part]
                    except KeyError:
                        node[lowercased_part] = {}
                        node = node[lowercased_part]

        node['_tag'] = (method_map, resource)

    def match(self, path):
        fields = {}
        node = self._root

        if path != '/':
            # NOTE(kgriffs): Remove initial slash since we want to
            # start traversing one level down from the root
            for part in path[1:].split('/'):
                if '*' in node:
                    # Field expression matches everything; store the part
                    node = node['*']

                    name = node['_fname']

                    # Validate according to expansion type
                    # TODO(kgriffs): Does this require decoding the
                    # path, not just qs, in Request.__init__?
                    # NOT(kgriffs): This is stricter than
                    # the current parser, also adds a little
                    # bit of overhead... perhaps let the
                    # app developer decide if they want
                    # to enable strict mode? If not, they
                    # need to still validate the input (may
                    # happen as a matter of course when converting
                    # to an int or UUID or something).f
                    # if pattern.match(part) is None:
                    #     raise InvalidExpansion()

                    fields[name] = part

                else:
                    try:
                        node = node[part.lower()]
                    except KeyError:
                        # No match
                        return None

        try:
            method_map, resource = node['_tag']
        except KeyError:
            # NOTE(kgriffs): Found an intermediate node that actually
            # doesn't correspond to a valid template. This can happen
            # when no template was added for this level in the tree,
            # but there does exist a template for a lower level.
            return None

        return (method_map, resource, fields)


# NOTE(kgriffs): This function creates a regex that is used in
# Falcon's current iterative router. The framework will try each
# regex in turn. When it finds a match, it will call pattern.groupdict
# to get a dict of values for any fields that were defined in the
# template. Therefore, when benchmarking, one must take into account
# that finding an invidiual route will take up to (n * M) + G, where
# n is the number of routes, M is the cost to call the regex match()
# method for each route (technically, M will be different for each
# route), and G is the cost to call the regex groupdict() method.
def compile_uri_template(template):
    """Compile the given URI template string into a pattern matcher.

    Currently only recognizes Level 1 URI templates, and only for the path
    portion of the URI.

    See also: http://tools.ietf.org/html/rfc6570

    Args:
        template: A Level 1 URI template. Method responders must accept, as
            arguments, all fields specified in the template (default '/').
            Note that field names are restricted to ASCII a-z, A-Z, and
            the underscore '_'.

    Returns:
        (template_field_names, template_regex)

    """

    if not isinstance(template, six.string_types):
        raise TypeError('uri_template is not a string')

    if not template.startswith('/'):
        raise ValueError("uri_template must start with '/'")

    if '//' in template:
        raise ValueError("uri_template may not contain '//'")

    if template != '/' and template.endswith('/'):
        template = template[:-1]

    expression_pattern = r'{([a-zA-Z][a-zA-Z_]*)}'

    # Get a list of field names
    fields = set(re.findall(expression_pattern, template))

    # Convert Level 1 var patterns to equivalent named regex groups
    escaped = re.sub(r'[\.\(\)\[\]\?\*\+\^\|]', r'\\\g<0>', template)
    pattern = re.sub(expression_pattern, r'(?P<\1>[^/]+)', escaped)
    pattern = r'\A' + pattern + r'\Z'

    return re.compile(pattern, re.IGNORECASE)


def match_iter(patterns, path):
    for pattern in patterns:
        m = pattern.match(path)
        if m:
            return m.groupdict()

    return None


tree = TemplateTree()
template = '/{project_id}/queues/{queue_id}'
tree.insert(template, 1, 2)
pattern = compile_uri_template(template)

long_template = '/{project_id}/queues/{queue_id}/some/thing/{else}/foo/{foo}'
tree.insert(long_template, 3, 4)
long_pattern = compile_uri_template(long_template)

path = '/45/queues/5242234'
long_path = '/1234/queues/foo-bar/some/thing/1234523242524243244332/foo/58382-425'

patterns = [pattern, long_pattern]
	# Copyright 2013 by Rackspace Hosting, Inc.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import re

	import six

	_SIMPLE_PATTERN = re.compile(r'[0-9A-Za-z_~\-]+\Z')

	# NOTE(kgriffs): No need to check for pct-encoded, because WSGI servers
	# decode those before passing them to us via the PATH_INFO environ field.
	_RESERVED_PATTERN = re.compile(
	r'[0-9A-Za-z_~\-:/\?#\[\]@\!\$&\'\(\)\*\+,;=]+\Z')


	class TemplateError(Exception):
	pass


	class InvalidExpansion(TemplateError):
	pass


	class TemplateTree(object):
	__slots__ = ('_root')

	def __init__(self):
	self._root = {}

	def insert(self, template, method_map, resource):
	node = self._root

	assert template.startswith('/')
	if template != '/':
	assert not template.endswith('/')

	# NOTE(kgriffs): Remove initial slash since we want to
	# start traversing one level down from the root
	parts = template[1:].split('/')
	for part in parts:
	if part.startswith('{') and part.endswith('}'):
	# Field expression
	# if part[1] == '+':
	# # Level 2 reserved expansion
	# pattern = _RESERVED_PATTERN
	# else:
	# pattern = _SIMPLE_PATTERN

	# node['*'] = {'_fname': (part[1:-1], pattern)}

	try:
	child = node['*']
	except KeyError:
	child = {}

	# Remember the name of the field so we can use it
	# as a key in the match dict later on.
	child['_fname'] = part[1:-1]
	node['*'] = child

	# Move to the child node
	node = child

	else:
	lowercased_part = part.lower()

	try:
	node = node[lowercased_part]
	except KeyError:
	node[lowercased_part] = {}
	node = node[lowercased_part]

	node['_tag'] = (method_map, resource)

	def match(self, path):
	fields = {}
	node = self._root

	if path != '/':
	# NOTE(kgriffs): Remove initial slash since we want to
	# start traversing one level down from the root
	for part in path[1:].split('/'):
	if '*' in node:
	# Field expression matches everything; store the part
	node = node['*']

	name = node['_fname']

	# Validate according to expansion type
	# TODO(kgriffs): Does this require decoding the
	# path, not just qs, in Request.__init__?
	# NOT(kgriffs): This is stricter than
	# the current parser, also adds a little
	# bit of overhead... perhaps let the
	# app developer decide if they want
	# to enable strict mode? If not, they
	# need to still validate the input (may
	# happen as a matter of course when converting
	# to an int or UUID or something).f
	# if pattern.match(part) is None:
	# raise InvalidExpansion()

	fields[name] = part

	else:
	try:
	node = node[part.lower()]
	except KeyError:
	# No match
	return None

	try:
	method_map, resource = node['_tag']
	except KeyError:
	# NOTE(kgriffs): Found an intermediate node that actually
	# doesn't correspond to a valid template. This can happen
	# when no template was added for this level in the tree,
	# but there does exist a template for a lower level.
	return None

	return (method_map, resource, fields)


	# NOTE(kgriffs): This function creates a regex that is used in
	# Falcon's current iterative router. The framework will try each
	# regex in turn. When it finds a match, it will call pattern.groupdict
	# to get a dict of values for any fields that were defined in the
	# template. Therefore, when benchmarking, one must take into account
	# that finding an invidiual route will take up to (n * M) + G, where
	# n is the number of routes, M is the cost to call the regex match()
	# method for each route (technically, M will be different for each
	# route), and G is the cost to call the regex groupdict() method.
	def compile_uri_template(template):
	"""Compile the given URI template string into a pattern matcher.

	Currently only recognizes Level 1 URI templates, and only for the path
	portion of the URI.

	See also: http://tools.ietf.org/html/rfc6570

	Args:
	template: A Level 1 URI template. Method responders must accept, as
	arguments, all fields specified in the template (default '/').
	Note that field names are restricted to ASCII a-z, A-Z, and
	the underscore '_'.

	Returns:
	(template_field_names, template_regex)

	"""

	if not isinstance(template, six.string_types):
	raise TypeError('uri_template is not a string')

	if not template.startswith('/'):
	raise ValueError("uri_template must start with '/'")

	if '//' in template:
	raise ValueError("uri_template may not contain '//'")

	if template != '/' and template.endswith('/'):
	template = template[:-1]

	expression_pattern = r'{([a-zA-Z][a-zA-Z_]*)}'

	# Get a list of field names
	fields = set(re.findall(expression_pattern, template))

	# Convert Level 1 var patterns to equivalent named regex groups
	escaped = re.sub(r'[\.\(\)\[\]\?\*\+\^\\|]', r'\\\g<0>', template)
	pattern = re.sub(expression_pattern, r'(?P<\1>[^/]+)', escaped)
	pattern = r'\A' + pattern + r'\Z'

	return re.compile(pattern, re.IGNORECASE)


	def match_iter(patterns, path):
	for pattern in patterns:
	m = pattern.match(path)
	if m:
	return m.groupdict()

	return None


	tree = TemplateTree()
	template = '/{project_id}/queues/{queue_id}'
	tree.insert(template, 1, 2)
	pattern = compile_uri_template(template)

	long_template = '/{project_id}/queues/{queue_id}/some/thing/{else}/foo/{foo}'
	tree.insert(long_template, 3, 4)
	long_pattern = compile_uri_template(long_template)

	path = '/45/queues/5242234'
	long_path = '/1234/queues/foo-bar/some/thing/1234523242524243244332/foo/58382-425'

	patterns = [pattern, long_pattern]