Created
August 8, 2019 11:45
-
-
Save exhuma/801a8b785fca21ebab8f931fe8b943d1 to your computer and use it in GitHub Desktop.
Parser for the "Accept" header in HTTP requests
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This module contains helpers to work with the "Accept" request header | |
""" | |
import re | |
from typing import Generator, Iterable, List, NamedTuple, Tuple | |
AcceptType = NamedTuple('AcceptType', [ | |
('media_type', str), | |
('quality', float), | |
]) | |
P_QUOTE_NEEDED = re.compile(r'["\s]') | |
def parse_mt_args(data: str) -> Generator[Tuple[str, str], None, None]: | |
""" | |
This parses the "parameters" part of a media-type and returns the | |
parameters as a generator of (key, value) tuples. | |
>>> list(parse_mt_args('foo=bar; frob="hello world"')) | |
[('foo', 'bar'), ('frob', 'hello world')] | |
""" | |
state = 'keyname' | |
current_key_name = '' | |
last_cut = 0 | |
value = '' | |
for idx, char in enumerate(data): | |
if state == 'keyname': | |
if char in ' \t\r\n;': | |
last_cut = idx+1 | |
continue # ignore whitespace and other junk | |
elif char == '=': | |
current_key_name = data[last_cut:idx] | |
last_cut = idx+1 | |
state = 'value' | |
continue | |
elif state == "value": | |
if char == ';': | |
state = 'keyname' | |
value = data[last_cut:idx] | |
last_cut = idx+2 | |
yield current_key_name, value | |
continue | |
elif char == '"': | |
state = 'quoted_value' | |
last_cut = idx+1 | |
continue | |
elif state == 'quoted_value': | |
if char == '"' and data[idx-1] != '\\': | |
state = 'keyname' | |
value = unquote_arg_value(data[last_cut:idx]) | |
last_cut = idx+2 | |
yield current_key_name, value | |
continue | |
else: | |
raise ValueError('Unexpected parser state!') | |
if last_cut < len(data): | |
value = data[last_cut:] | |
yield current_key_name, value | |
def split_accept_types(data: str) -> Generator[str, None, None]: | |
""" | |
This splits the "Accept" header into multiple standard media-type strings | |
without any special handling of the "q" parameter. | |
>>> list(split_accept_types('text/plain; charset=utf8, image/jpeg; q=0.5')) | |
['text/plain; charset=utf8', 'image/jpeg; q=0.5'] | |
""" | |
in_quotes = False | |
last_cut = 0 | |
for idx in range(1, len(data)): | |
previous, current = data[idx-1], data[idx] | |
if current == '"' and previous != '\\': | |
in_quotes = not in_quotes | |
if not in_quotes and current == ',': | |
yield data[last_cut:idx].strip() | |
last_cut = idx+1 | |
yield data[last_cut:].strip() | |
def unquote_arg_value(value: str) -> str: | |
""" | |
Unquotes an argument from an RFC-7230 header parameter value. | |
>>> unquote_arg_value('Hello \\"World\\"') | |
'Hello "World"' | |
""" | |
if '\\"' in value: | |
return value.replace('\\"', '"') | |
return value | |
def quote_arg_value(value: str) -> str: | |
""" | |
Unquotes an argument from an RFC-7230 header parameter value. | |
>>> quote_arg_value('Hello "World"') | |
'"Hello \\"World\\""' | |
""" | |
if P_QUOTE_NEEDED.search(value): | |
return '"%s"' % value.replace('"', '\\"') | |
return value | |
def parse_accept(data: str) -> Generator[AcceptType, None, None]: | |
""" | |
Parses the value of an "Accept" header and returns a generator over | |
(media-type, quality) tuples. The results are *not* sorted. This can be | |
achieved using :py:func:`~.sort_accept`. | |
>>> list(parse_accept('text/plain; charset=utf8; q=1, image/jpeg; q=0.5')) | |
[AcceptType('text/plain; charset=utf8', 1.0), | |
AcceptType('image/jpeg', 0.5)] | |
""" | |
mts = split_accept_types(data) | |
for mt in mts: | |
quality = 1.0 | |
mt_out, _, args_raw = mt.partition(';') | |
if not args_raw: | |
yield AcceptType(mt_out, quality) | |
continue | |
args = parse_mt_args(args_raw) | |
mt_args = [] | |
for key, value in args: | |
if key in 'qQ': | |
quality = float(value) | |
break | |
mt_args.append((key, value)) | |
if mt_args: | |
str_args = ['%s=%s' % (k, quote_arg_value(v)) for k, v in mt_args] | |
mt_out = '%s; %s' % (mt_out, '; '.join(str_args)) | |
yield AcceptType(mt_out, quality) | |
def sort_accept(accept_types: Iterable[AcceptType]) -> List[AcceptType]: | |
""" | |
Sorts an iterable of AcceptType instances from most specific to least | |
specific. | |
As per RFC-7231 section 5.3.2, the precedence of multiple "accept" types | |
relies on the specificity of the media-type. This function does not fully | |
implement this, but instead uses the character-length of the media-type as | |
heuristic. | |
>>> sort_accpt([AcceptType('foo/bar', 0.3), AcceptType('bar/baz', 0.5)]) | |
[AcceptType('bar/baz', 0.5), AcceptType('foo/bar', 0.3)] | |
>>> sort_accpt([AcceptType('foo/bar', 0.5), AcceptType('bar/frob', 0.5)]) | |
[AcceptType('bar/frob', 0.5), AcceptType('foo/bar', 0.5)] | |
""" | |
def sort_key(value: AcceptType) -> Tuple[float, int]: | |
return (-value.quality, -len(value.media_type)) | |
output = sorted(accept_types, key=sort_key) | |
return output |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment