Skip to content

Instantly share code, notes, and snippets.

Created Aug 8, 2019
What would you like to do?
Parser for the "Accept" header in HTTP requests
This module contains helpers to work with the "Accept" request header
import re
from typing import Generator, Iterable, List, NamedTuple, Tuple
AcceptType = NamedTuple('AcceptType', [
('media_type', str),
('quality', float),
P_QUOTE_NEEDED = re.compile(r'["\s]')
def parse_mt_args(data: str) -> Generator[Tuple[str, str], None, None]:
This parses the "parameters" part of a media-type and returns the
parameters as a generator of (key, value) tuples.
>>> list(parse_mt_args('foo=bar; frob="hello world"'))
[('foo', 'bar'), ('frob', 'hello world')]
state = 'keyname'
current_key_name = ''
last_cut = 0
value = ''
for idx, char in enumerate(data):
if state == 'keyname':
if char in ' \t\r\n;':
last_cut = idx+1
continue # ignore whitespace and other junk
elif char == '=':
current_key_name = data[last_cut:idx]
last_cut = idx+1
state = 'value'
elif state == "value":
if char == ';':
state = 'keyname'
value = data[last_cut:idx]
last_cut = idx+2
yield current_key_name, value
elif char == '"':
state = 'quoted_value'
last_cut = idx+1
elif state == 'quoted_value':
if char == '"' and data[idx-1] != '\\':
state = 'keyname'
value = unquote_arg_value(data[last_cut:idx])
last_cut = idx+2
yield current_key_name, value
raise ValueError('Unexpected parser state!')
if last_cut < len(data):
value = data[last_cut:]
yield current_key_name, value
def split_accept_types(data: str) -> Generator[str, None, None]:
This splits the "Accept" header into multiple standard media-type strings
without any special handling of the "q" parameter.
>>> list(split_accept_types('text/plain; charset=utf8, image/jpeg; q=0.5'))
['text/plain; charset=utf8', 'image/jpeg; q=0.5']
in_quotes = False
last_cut = 0
for idx in range(1, len(data)):
previous, current = data[idx-1], data[idx]
if current == '"' and previous != '\\':
in_quotes = not in_quotes
if not in_quotes and current == ',':
yield data[last_cut:idx].strip()
last_cut = idx+1
yield data[last_cut:].strip()
def unquote_arg_value(value: str) -> str:
Unquotes an argument from an RFC-7230 header parameter value.
>>> unquote_arg_value('Hello \\"World\\"')
'Hello "World"'
if '\\"' in value:
return value.replace('\\"', '"')
return value
def quote_arg_value(value: str) -> str:
Unquotes an argument from an RFC-7230 header parameter value.
>>> quote_arg_value('Hello "World"')
'"Hello \\"World\\""'
return '"%s"' % value.replace('"', '\\"')
return value
def parse_accept(data: str) -> Generator[AcceptType, None, None]:
Parses the value of an "Accept" header and returns a generator over
(media-type, quality) tuples. The results are *not* sorted. This can be
achieved using :py:func:`~.sort_accept`.
>>> list(parse_accept('text/plain; charset=utf8; q=1, image/jpeg; q=0.5'))
[AcceptType('text/plain; charset=utf8', 1.0),
AcceptType('image/jpeg', 0.5)]
mts = split_accept_types(data)
for mt in mts:
quality = 1.0
mt_out, _, args_raw = mt.partition(';')
if not args_raw:
yield AcceptType(mt_out, quality)
args = parse_mt_args(args_raw)
mt_args = []
for key, value in args:
if key in 'qQ':
quality = float(value)
mt_args.append((key, value))
if mt_args:
str_args = ['%s=%s' % (k, quote_arg_value(v)) for k, v in mt_args]
mt_out = '%s; %s' % (mt_out, '; '.join(str_args))
yield AcceptType(mt_out, quality)
def sort_accept(accept_types: Iterable[AcceptType]) -> List[AcceptType]:
Sorts an iterable of AcceptType instances from most specific to least
As per RFC-7231 section 5.3.2, the precedence of multiple "accept" types
relies on the specificity of the media-type. This function does not fully
implement this, but instead uses the character-length of the media-type as
>>> sort_accpt([AcceptType('foo/bar', 0.3), AcceptType('bar/baz', 0.5)])
[AcceptType('bar/baz', 0.5), AcceptType('foo/bar', 0.3)]
>>> sort_accpt([AcceptType('foo/bar', 0.5), AcceptType('bar/frob', 0.5)])
[AcceptType('bar/frob', 0.5), AcceptType('foo/bar', 0.5)]
def sort_key(value: AcceptType) -> Tuple[float, int]:
return (-value.quality, -len(value.media_type))
output = sorted(accept_types, key=sort_key)
return output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment