exhuma/accept.py

## accept.py
"""
This module contains helpers to work with the "Accept" request header
"""
import re
from typing import Generator, Iterable, List, NamedTuple, Tuple

AcceptType = NamedTuple('AcceptType', [
    ('media_type', str),
    ('quality', float),
])

P_QUOTE_NEEDED = re.compile(r'["\s]')


def parse_mt_args(data: str) -> Generator[Tuple[str, str], None, None]:
    """
    This parses the "parameters" part of a media-type and returns the
    parameters as a generator of (key, value) tuples.

    >>> list(parse_mt_args('foo=bar; frob="hello world"'))
    [('foo', 'bar'), ('frob', 'hello world')]
    """
    state = 'keyname'
    current_key_name = ''
    last_cut = 0
    value = ''
    for idx, char in enumerate(data):
        if state == 'keyname':
            if char in ' \t\r\n;':
                last_cut = idx+1
                continue  # ignore whitespace and other junk
            elif char == '=':
                current_key_name = data[last_cut:idx]
                last_cut = idx+1
                state = 'value'
                continue
        elif state == "value":
            if char == ';':
                state = 'keyname'
                value = data[last_cut:idx]
                last_cut = idx+2
                yield current_key_name, value
                continue
            elif char == '"':
                state = 'quoted_value'
                last_cut = idx+1
                continue
        elif state == 'quoted_value':
            if char == '"' and data[idx-1] != '\\':
                state = 'keyname'
                value = unquote_arg_value(data[last_cut:idx])
                last_cut = idx+2
                yield current_key_name, value
                continue
        else:
            raise ValueError('Unexpected parser state!')
    if last_cut < len(data):
        value = data[last_cut:]
        yield current_key_name, value


def split_accept_types(data: str) -> Generator[str, None, None]:
    """
    This splits the "Accept" header into multiple standard media-type strings
    without any special handling of the "q" parameter.


    >>> list(split_accept_types('text/plain; charset=utf8, image/jpeg; q=0.5'))
    ['text/plain; charset=utf8', 'image/jpeg; q=0.5']
    """
    in_quotes = False
    last_cut = 0
    for idx in range(1, len(data)):
        previous, current = data[idx-1], data[idx]
        if current == '"' and previous != '\\':
            in_quotes = not in_quotes
        if not in_quotes and current == ',':
            yield data[last_cut:idx].strip()
            last_cut = idx+1

    yield data[last_cut:].strip()


def unquote_arg_value(value: str) -> str:
    """
    Unquotes an argument from an RFC-7230 header parameter value.

    >>> unquote_arg_value('Hello \\"World\\"')
    'Hello "World"'
    """
    if '\\"' in value:
        return value.replace('\\"', '"')
    return value


def quote_arg_value(value: str) -> str:
    """
    Unquotes an argument from an RFC-7230 header parameter value.

    >>> quote_arg_value('Hello "World"')
    '"Hello \\"World\\""'
    """
    if P_QUOTE_NEEDED.search(value):
        return '"%s"' % value.replace('"', '\\"')
    return value


def parse_accept(data: str) -> Generator[AcceptType, None, None]:
    """
    Parses the value of an "Accept" header and returns a generator over
    (media-type, quality) tuples. The results are *not* sorted. This can be
    achieved using :py:func:`~.sort_accept`.

    >>> list(parse_accept('text/plain; charset=utf8; q=1, image/jpeg; q=0.5'))
    [AcceptType('text/plain; charset=utf8', 1.0),
     AcceptType('image/jpeg', 0.5)]
    """
    mts = split_accept_types(data)
    for mt in mts:
        quality = 1.0
        mt_out, _, args_raw = mt.partition(';')
        if not args_raw:
            yield AcceptType(mt_out, quality)
            continue
        args = parse_mt_args(args_raw)
        mt_args = []
        for key, value in args:
            if key in 'qQ':
                quality = float(value)
                break
            mt_args.append((key, value))
        if mt_args:
            str_args = ['%s=%s' % (k, quote_arg_value(v)) for k, v in mt_args]
            mt_out = '%s; %s' % (mt_out, '; '.join(str_args))
        yield AcceptType(mt_out, quality)


def sort_accept(accept_types: Iterable[AcceptType]) -> List[AcceptType]:
    """
    Sorts an iterable of AcceptType instances from most specific to least
    specific.

    As per RFC-7231 section 5.3.2, the precedence of multiple "accept" types
    relies on the specificity of the media-type. This function does not fully
    implement this, but instead uses the character-length of the media-type as
    heuristic.

    >>> sort_accpt([AcceptType('foo/bar', 0.3), AcceptType('bar/baz', 0.5)])
    [AcceptType('bar/baz', 0.5), AcceptType('foo/bar', 0.3)]
    >>> sort_accpt([AcceptType('foo/bar', 0.5), AcceptType('bar/frob', 0.5)])
    [AcceptType('bar/frob', 0.5), AcceptType('foo/bar', 0.5)]
    """
    def sort_key(value: AcceptType) -> Tuple[float, int]:
        return (-value.quality, -len(value.media_type))
    output = sorted(accept_types, key=sort_key)
    return output
	"""
	This module contains helpers to work with the "Accept" request header
	"""
	import re
	from typing import Generator, Iterable, List, NamedTuple, Tuple

	AcceptType = NamedTuple('AcceptType', [
	('media_type', str),
	('quality', float),
	])

	P_QUOTE_NEEDED = re.compile(r'["\s]')


	def parse_mt_args(data: str) -> Generator[Tuple[str, str], None, None]:
	"""
	This parses the "parameters" part of a media-type and returns the
	parameters as a generator of (key, value) tuples.

	>>> list(parse_mt_args('foo=bar; frob="hello world"'))
	[('foo', 'bar'), ('frob', 'hello world')]
	"""
	state = 'keyname'
	current_key_name = ''
	last_cut = 0
	value = ''
	for idx, char in enumerate(data):
	if state == 'keyname':
	if char in ' \t\r\n;':
	last_cut = idx+1
	continue # ignore whitespace and other junk
	elif char == '=':
	current_key_name = data[last_cut:idx]
	last_cut = idx+1
	state = 'value'
	continue
	elif state == "value":
	if char == ';':
	state = 'keyname'
	value = data[last_cut:idx]
	last_cut = idx+2
	yield current_key_name, value
	continue
	elif char == '"':
	state = 'quoted_value'
	last_cut = idx+1
	continue
	elif state == 'quoted_value':
	if char == '"' and data[idx-1] != '\\':
	state = 'keyname'
	value = unquote_arg_value(data[last_cut:idx])
	last_cut = idx+2
	yield current_key_name, value
	continue
	else:
	raise ValueError('Unexpected parser state!')
	if last_cut < len(data):
	value = data[last_cut:]
	yield current_key_name, value


	def split_accept_types(data: str) -> Generator[str, None, None]:
	"""
	This splits the "Accept" header into multiple standard media-type strings
	without any special handling of the "q" parameter.


	>>> list(split_accept_types('text/plain; charset=utf8, image/jpeg; q=0.5'))
	['text/plain; charset=utf8', 'image/jpeg; q=0.5']
	"""
	in_quotes = False
	last_cut = 0
	for idx in range(1, len(data)):
	previous, current = data[idx-1], data[idx]
	if current == '"' and previous != '\\':
	in_quotes = not in_quotes
	if not in_quotes and current == ',':
	yield data[last_cut:idx].strip()
	last_cut = idx+1

	yield data[last_cut:].strip()


	def unquote_arg_value(value: str) -> str:
	"""
	Unquotes an argument from an RFC-7230 header parameter value.

	>>> unquote_arg_value('Hello \\"World\\"')
	'Hello "World"'
	"""
	if '\\"' in value:
	return value.replace('\\"', '"')
	return value


	def quote_arg_value(value: str) -> str:
	"""
	Unquotes an argument from an RFC-7230 header parameter value.

	>>> quote_arg_value('Hello "World"')
	'"Hello \\"World\\""'
	"""
	if P_QUOTE_NEEDED.search(value):
	return '"%s"' % value.replace('"', '\\"')
	return value


	def parse_accept(data: str) -> Generator[AcceptType, None, None]:
	"""
	Parses the value of an "Accept" header and returns a generator over
	(media-type, quality) tuples. The results are not sorted. This can be
	achieved using :py:func:`~.sort_accept`.

	>>> list(parse_accept('text/plain; charset=utf8; q=1, image/jpeg; q=0.5'))
	[AcceptType('text/plain; charset=utf8', 1.0),
	AcceptType('image/jpeg', 0.5)]
	"""
	mts = split_accept_types(data)
	for mt in mts:
	quality = 1.0
	mt_out, _, args_raw = mt.partition(';')
	if not args_raw:
	yield AcceptType(mt_out, quality)
	continue
	args = parse_mt_args(args_raw)
	mt_args = []
	for key, value in args:
	if key in 'qQ':
	quality = float(value)
	break
	mt_args.append((key, value))
	if mt_args:
	str_args = ['%s=%s' % (k, quote_arg_value(v)) for k, v in mt_args]
	mt_out = '%s; %s' % (mt_out, '; '.join(str_args))
	yield AcceptType(mt_out, quality)


	def sort_accept(accept_types: Iterable[AcceptType]) -> List[AcceptType]:
	"""
	Sorts an iterable of AcceptType instances from most specific to least
	specific.

	As per RFC-7231 section 5.3.2, the precedence of multiple "accept" types
	relies on the specificity of the media-type. This function does not fully
	implement this, but instead uses the character-length of the media-type as
	heuristic.

	>>> sort_accpt([AcceptType('foo/bar', 0.3), AcceptType('bar/baz', 0.5)])
	[AcceptType('bar/baz', 0.5), AcceptType('foo/bar', 0.3)]
	>>> sort_accpt([AcceptType('foo/bar', 0.5), AcceptType('bar/frob', 0.5)])
	[AcceptType('bar/frob', 0.5), AcceptType('foo/bar', 0.5)]
	"""
	def sort_key(value: AcceptType) -> Tuple[float, int]:
	return (-value.quality, -len(value.media_type))
	output = sorted(accept_types, key=sort_key)
	return output