Skip to content

Instantly share code, notes, and snippets.

@kgriffs
Last active April 24, 2019 03:51
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kgriffs/4fcf96a9ca8968d3c393 to your computer and use it in GitHub Desktop.
Save kgriffs/4fcf96a9ca8968d3c393 to your computer and use it in GitHub Desktop.
URI Template Tree POC
# Copyright 2013 by Rackspace Hosting, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import six
_SIMPLE_PATTERN = re.compile(r'[0-9A-Za-z_~\-]+\Z')
# NOTE(kgriffs): No need to check for pct-encoded, because WSGI servers
# decode those before passing them to us via the PATH_INFO environ field.
_RESERVED_PATTERN = re.compile(
r'[0-9A-Za-z_~\-:/\?#\[\]@\!\$&\'\(\)\*\+,;=]+\Z')
class TemplateError(Exception):
pass
class InvalidExpansion(TemplateError):
pass
class TemplateTree(object):
__slots__ = ('_root')
def __init__(self):
self._root = {}
def insert(self, template, method_map, resource):
node = self._root
assert template.startswith('/')
if template != '/':
assert not template.endswith('/')
# NOTE(kgriffs): Remove initial slash since we want to
# start traversing one level down from the root
parts = template[1:].split('/')
for part in parts:
if part.startswith('{') and part.endswith('}'):
# Field expression
# if part[1] == '+':
# # Level 2 reserved expansion
# pattern = _RESERVED_PATTERN
# else:
# pattern = _SIMPLE_PATTERN
# node['*'] = {'_fname': (part[1:-1], pattern)}
try:
child = node['*']
except KeyError:
child = {}
# Remember the name of the field so we can use it
# as a key in the match dict later on.
child['_fname'] = part[1:-1]
node['*'] = child
# Move to the child node
node = child
else:
lowercased_part = part.lower()
try:
node = node[lowercased_part]
except KeyError:
node[lowercased_part] = {}
node = node[lowercased_part]
node['_tag'] = (method_map, resource)
def match(self, path):
fields = {}
node = self._root
if path != '/':
# NOTE(kgriffs): Remove initial slash since we want to
# start traversing one level down from the root
for part in path[1:].split('/'):
if '*' in node:
# Field expression matches everything; store the part
node = node['*']
name = node['_fname']
# Validate according to expansion type
# TODO(kgriffs): Does this require decoding the
# path, not just qs, in Request.__init__?
# NOT(kgriffs): This is stricter than
# the current parser, also adds a little
# bit of overhead... perhaps let the
# app developer decide if they want
# to enable strict mode? If not, they
# need to still validate the input (may
# happen as a matter of course when converting
# to an int or UUID or something).f
# if pattern.match(part) is None:
# raise InvalidExpansion()
fields[name] = part
else:
try:
node = node[part.lower()]
except KeyError:
# No match
return None
try:
method_map, resource = node['_tag']
except KeyError:
# NOTE(kgriffs): Found an intermediate node that actually
# doesn't correspond to a valid template. This can happen
# when no template was added for this level in the tree,
# but there does exist a template for a lower level.
return None
return (method_map, resource, fields)
# NOTE(kgriffs): This function creates a regex that is used in
# Falcon's current iterative router. The framework will try each
# regex in turn. When it finds a match, it will call pattern.groupdict
# to get a dict of values for any fields that were defined in the
# template. Therefore, when benchmarking, one must take into account
# that finding an invidiual route will take up to (n * M) + G, where
# n is the number of routes, M is the cost to call the regex match()
# method for each route (technically, M will be different for each
# route), and G is the cost to call the regex groupdict() method.
def compile_uri_template(template):
"""Compile the given URI template string into a pattern matcher.
Currently only recognizes Level 1 URI templates, and only for the path
portion of the URI.
See also: http://tools.ietf.org/html/rfc6570
Args:
template: A Level 1 URI template. Method responders must accept, as
arguments, all fields specified in the template (default '/').
Note that field names are restricted to ASCII a-z, A-Z, and
the underscore '_'.
Returns:
(template_field_names, template_regex)
"""
if not isinstance(template, six.string_types):
raise TypeError('uri_template is not a string')
if not template.startswith('/'):
raise ValueError("uri_template must start with '/'")
if '//' in template:
raise ValueError("uri_template may not contain '//'")
if template != '/' and template.endswith('/'):
template = template[:-1]
expression_pattern = r'{([a-zA-Z][a-zA-Z_]*)}'
# Get a list of field names
fields = set(re.findall(expression_pattern, template))
# Convert Level 1 var patterns to equivalent named regex groups
escaped = re.sub(r'[\.\(\)\[\]\?\*\+\^\|]', r'\\\g<0>', template)
pattern = re.sub(expression_pattern, r'(?P<\1>[^/]+)', escaped)
pattern = r'\A' + pattern + r'\Z'
return re.compile(pattern, re.IGNORECASE)
def match_iter(patterns, path):
for pattern in patterns:
m = pattern.match(path)
if m:
return m.groupdict()
return None
tree = TemplateTree()
template = '/{project_id}/queues/{queue_id}'
tree.insert(template, 1, 2)
pattern = compile_uri_template(template)
long_template = '/{project_id}/queues/{queue_id}/some/thing/{else}/foo/{foo}'
tree.insert(long_template, 3, 4)
long_pattern = compile_uri_template(long_template)
path = '/45/queues/5242234'
long_path = '/1234/queues/foo-bar/some/thing/1234523242524243244332/foo/58382-425'
patterns = [pattern, long_pattern]
@kgriffs
Copy link
Author

kgriffs commented Dec 6, 2014

Hmm.. I found a bug in the above. I'll fix it ASAP and upload a new version.

@kgriffs
Copy link
Author

kgriffs commented Dec 6, 2014

Fixed the bug and added regex code for comparison.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment