Last active
April 24, 2019 03:51
-
-
Save kgriffs/4fcf96a9ca8968d3c393 to your computer and use it in GitHub Desktop.
URI Template Tree POC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2013 by Rackspace Hosting, Inc. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import re | |
import six | |
_SIMPLE_PATTERN = re.compile(r'[0-9A-Za-z_~\-]+\Z') | |
# NOTE(kgriffs): No need to check for pct-encoded, because WSGI servers | |
# decode those before passing them to us via the PATH_INFO environ field. | |
_RESERVED_PATTERN = re.compile( | |
r'[0-9A-Za-z_~\-:/\?#\[\]@\!\$&\'\(\)\*\+,;=]+\Z') | |
class TemplateError(Exception): | |
pass | |
class InvalidExpansion(TemplateError): | |
pass | |
class TemplateTree(object): | |
__slots__ = ('_root') | |
def __init__(self): | |
self._root = {} | |
def insert(self, template, method_map, resource): | |
node = self._root | |
assert template.startswith('/') | |
if template != '/': | |
assert not template.endswith('/') | |
# NOTE(kgriffs): Remove initial slash since we want to | |
# start traversing one level down from the root | |
parts = template[1:].split('/') | |
for part in parts: | |
if part.startswith('{') and part.endswith('}'): | |
# Field expression | |
# if part[1] == '+': | |
# # Level 2 reserved expansion | |
# pattern = _RESERVED_PATTERN | |
# else: | |
# pattern = _SIMPLE_PATTERN | |
# node['*'] = {'_fname': (part[1:-1], pattern)} | |
try: | |
child = node['*'] | |
except KeyError: | |
child = {} | |
# Remember the name of the field so we can use it | |
# as a key in the match dict later on. | |
child['_fname'] = part[1:-1] | |
node['*'] = child | |
# Move to the child node | |
node = child | |
else: | |
lowercased_part = part.lower() | |
try: | |
node = node[lowercased_part] | |
except KeyError: | |
node[lowercased_part] = {} | |
node = node[lowercased_part] | |
node['_tag'] = (method_map, resource) | |
def match(self, path): | |
fields = {} | |
node = self._root | |
if path != '/': | |
# NOTE(kgriffs): Remove initial slash since we want to | |
# start traversing one level down from the root | |
for part in path[1:].split('/'): | |
if '*' in node: | |
# Field expression matches everything; store the part | |
node = node['*'] | |
name = node['_fname'] | |
# Validate according to expansion type | |
# TODO(kgriffs): Does this require decoding the | |
# path, not just qs, in Request.__init__? | |
# NOT(kgriffs): This is stricter than | |
# the current parser, also adds a little | |
# bit of overhead... perhaps let the | |
# app developer decide if they want | |
# to enable strict mode? If not, they | |
# need to still validate the input (may | |
# happen as a matter of course when converting | |
# to an int or UUID or something).f | |
# if pattern.match(part) is None: | |
# raise InvalidExpansion() | |
fields[name] = part | |
else: | |
try: | |
node = node[part.lower()] | |
except KeyError: | |
# No match | |
return None | |
try: | |
method_map, resource = node['_tag'] | |
except KeyError: | |
# NOTE(kgriffs): Found an intermediate node that actually | |
# doesn't correspond to a valid template. This can happen | |
# when no template was added for this level in the tree, | |
# but there does exist a template for a lower level. | |
return None | |
return (method_map, resource, fields) | |
# NOTE(kgriffs): This function creates a regex that is used in | |
# Falcon's current iterative router. The framework will try each | |
# regex in turn. When it finds a match, it will call pattern.groupdict | |
# to get a dict of values for any fields that were defined in the | |
# template. Therefore, when benchmarking, one must take into account | |
# that finding an invidiual route will take up to (n * M) + G, where | |
# n is the number of routes, M is the cost to call the regex match() | |
# method for each route (technically, M will be different for each | |
# route), and G is the cost to call the regex groupdict() method. | |
def compile_uri_template(template): | |
"""Compile the given URI template string into a pattern matcher. | |
Currently only recognizes Level 1 URI templates, and only for the path | |
portion of the URI. | |
See also: http://tools.ietf.org/html/rfc6570 | |
Args: | |
template: A Level 1 URI template. Method responders must accept, as | |
arguments, all fields specified in the template (default '/'). | |
Note that field names are restricted to ASCII a-z, A-Z, and | |
the underscore '_'. | |
Returns: | |
(template_field_names, template_regex) | |
""" | |
if not isinstance(template, six.string_types): | |
raise TypeError('uri_template is not a string') | |
if not template.startswith('/'): | |
raise ValueError("uri_template must start with '/'") | |
if '//' in template: | |
raise ValueError("uri_template may not contain '//'") | |
if template != '/' and template.endswith('/'): | |
template = template[:-1] | |
expression_pattern = r'{([a-zA-Z][a-zA-Z_]*)}' | |
# Get a list of field names | |
fields = set(re.findall(expression_pattern, template)) | |
# Convert Level 1 var patterns to equivalent named regex groups | |
escaped = re.sub(r'[\.\(\)\[\]\?\*\+\^\|]', r'\\\g<0>', template) | |
pattern = re.sub(expression_pattern, r'(?P<\1>[^/]+)', escaped) | |
pattern = r'\A' + pattern + r'\Z' | |
return re.compile(pattern, re.IGNORECASE) | |
def match_iter(patterns, path): | |
for pattern in patterns: | |
m = pattern.match(path) | |
if m: | |
return m.groupdict() | |
return None | |
tree = TemplateTree() | |
template = '/{project_id}/queues/{queue_id}' | |
tree.insert(template, 1, 2) | |
pattern = compile_uri_template(template) | |
long_template = '/{project_id}/queues/{queue_id}/some/thing/{else}/foo/{foo}' | |
tree.insert(long_template, 3, 4) | |
long_pattern = compile_uri_template(long_template) | |
path = '/45/queues/5242234' | |
long_path = '/1234/queues/foo-bar/some/thing/1234523242524243244332/foo/58382-425' | |
patterns = [pattern, long_pattern] |
Fixed the bug and added regex code for comparison.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hmm.. I found a bug in the above. I'll fix it ASAP and upload a new version.