Skip to content

Instantly share code, notes, and snippets.

@exhuma
Created August 8, 2019 11:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save exhuma/801a8b785fca21ebab8f931fe8b943d1 to your computer and use it in GitHub Desktop.
Save exhuma/801a8b785fca21ebab8f931fe8b943d1 to your computer and use it in GitHub Desktop.
Parser for the "Accept" header in HTTP requests
"""
This module contains helpers to work with the "Accept" request header
"""
import re
from typing import Generator, Iterable, List, NamedTuple, Tuple
AcceptType = NamedTuple('AcceptType', [
('media_type', str),
('quality', float),
])
P_QUOTE_NEEDED = re.compile(r'["\s]')
def parse_mt_args(data: str) -> Generator[Tuple[str, str], None, None]:
"""
This parses the "parameters" part of a media-type and returns the
parameters as a generator of (key, value) tuples.
>>> list(parse_mt_args('foo=bar; frob="hello world"'))
[('foo', 'bar'), ('frob', 'hello world')]
"""
state = 'keyname'
current_key_name = ''
last_cut = 0
value = ''
for idx, char in enumerate(data):
if state == 'keyname':
if char in ' \t\r\n;':
last_cut = idx+1
continue # ignore whitespace and other junk
elif char == '=':
current_key_name = data[last_cut:idx]
last_cut = idx+1
state = 'value'
continue
elif state == "value":
if char == ';':
state = 'keyname'
value = data[last_cut:idx]
last_cut = idx+2
yield current_key_name, value
continue
elif char == '"':
state = 'quoted_value'
last_cut = idx+1
continue
elif state == 'quoted_value':
if char == '"' and data[idx-1] != '\\':
state = 'keyname'
value = unquote_arg_value(data[last_cut:idx])
last_cut = idx+2
yield current_key_name, value
continue
else:
raise ValueError('Unexpected parser state!')
if last_cut < len(data):
value = data[last_cut:]
yield current_key_name, value
def split_accept_types(data: str) -> Generator[str, None, None]:
"""
This splits the "Accept" header into multiple standard media-type strings
without any special handling of the "q" parameter.
>>> list(split_accept_types('text/plain; charset=utf8, image/jpeg; q=0.5'))
['text/plain; charset=utf8', 'image/jpeg; q=0.5']
"""
in_quotes = False
last_cut = 0
for idx in range(1, len(data)):
previous, current = data[idx-1], data[idx]
if current == '"' and previous != '\\':
in_quotes = not in_quotes
if not in_quotes and current == ',':
yield data[last_cut:idx].strip()
last_cut = idx+1
yield data[last_cut:].strip()
def unquote_arg_value(value: str) -> str:
"""
Unquotes an argument from an RFC-7230 header parameter value.
>>> unquote_arg_value('Hello \\"World\\"')
'Hello "World"'
"""
if '\\"' in value:
return value.replace('\\"', '"')
return value
def quote_arg_value(value: str) -> str:
"""
Unquotes an argument from an RFC-7230 header parameter value.
>>> quote_arg_value('Hello "World"')
'"Hello \\"World\\""'
"""
if P_QUOTE_NEEDED.search(value):
return '"%s"' % value.replace('"', '\\"')
return value
def parse_accept(data: str) -> Generator[AcceptType, None, None]:
"""
Parses the value of an "Accept" header and returns a generator over
(media-type, quality) tuples. The results are *not* sorted. This can be
achieved using :py:func:`~.sort_accept`.
>>> list(parse_accept('text/plain; charset=utf8; q=1, image/jpeg; q=0.5'))
[AcceptType('text/plain; charset=utf8', 1.0),
AcceptType('image/jpeg', 0.5)]
"""
mts = split_accept_types(data)
for mt in mts:
quality = 1.0
mt_out, _, args_raw = mt.partition(';')
if not args_raw:
yield AcceptType(mt_out, quality)
continue
args = parse_mt_args(args_raw)
mt_args = []
for key, value in args:
if key in 'qQ':
quality = float(value)
break
mt_args.append((key, value))
if mt_args:
str_args = ['%s=%s' % (k, quote_arg_value(v)) for k, v in mt_args]
mt_out = '%s; %s' % (mt_out, '; '.join(str_args))
yield AcceptType(mt_out, quality)
def sort_accept(accept_types: Iterable[AcceptType]) -> List[AcceptType]:
"""
Sorts an iterable of AcceptType instances from most specific to least
specific.
As per RFC-7231 section 5.3.2, the precedence of multiple "accept" types
relies on the specificity of the media-type. This function does not fully
implement this, but instead uses the character-length of the media-type as
heuristic.
>>> sort_accpt([AcceptType('foo/bar', 0.3), AcceptType('bar/baz', 0.5)])
[AcceptType('bar/baz', 0.5), AcceptType('foo/bar', 0.3)]
>>> sort_accpt([AcceptType('foo/bar', 0.5), AcceptType('bar/frob', 0.5)])
[AcceptType('bar/frob', 0.5), AcceptType('foo/bar', 0.5)]
"""
def sort_key(value: AcceptType) -> Tuple[float, int]:
return (-value.quality, -len(value.media_type))
output = sorted(accept_types, key=sort_key)
return output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment