PetrGlad/audatic.py

## audatic.py
import statistics
import ast
import math
import json
from typing import Callable


class ParseError(Exception):
  pass


# Brute-force version :)
# Works as expected on a valid input but ignores some syntax errors.
def string_flattener_1(values_str, empty_list_value):
  # This parser is fundamentally sloppy, now trying to cath some input inconsistencies explicitly:
  if not values_str.startswith('[') or not values_str.endswith(']'):
    raise ParseError("Top level value is not a list.")
  if values_str.count('[') != values_str.count(']'):
    raise ParseError("List brackets are not balanced.")

  values_str = (values_str
                .replace('[]', str(empty_list_value))
                .replace('[', '')
                .replace(']', ''))
  for x_str in values_str.split(','):
    if x_str.lower() == 'nan':
      yield math.nan
    else:
      try:
        yield float(x_str)
      except ValueError:
        raise ParseError(f"Numeric value is malformed: {x_str}")


# Version of _1 with bracket nesting checks.
# Single loop procedure with depth tracking (alternatively it can be recursive).
def parser_flattener_2(values_str, empty_list_value):
  depth = 0
  value_start = None
  state = 'start'

  def eval_x():
    x_str = values_str[value_start:i]
    try:
      return float(x_str)
    except ValueError:
      raise ParseError(f"Numeric value is malformed: {x_str}")

  for i in range(len(values_str)):
    ch = values_str[i]
    if ch == '[':
      if state not in [',', '[', 'start']:
        raise ParseError(f"Unexpected start of the list at [{i}]")
      state = '['
      depth += 1
    elif ch == ']':
      if state == 'value':
        yield eval_x()
      elif state == '[':
        yield empty_list_value
      elif state != ']':
        raise ParseError(f"Unexpected end of the list at [{i}]")
      state = ']'
      depth -= 1
      if depth < 0:
        raise ParseError("Unmatched closing bracket.")
    elif ch == ',':
      if state == 'value':
        yield eval_x()
      elif state != ']':
        raise ParseError(f"Unexpected ',' at [{i}]")
      state = ','
    elif ch.isalnum() or ch in ['.', '-']:  # Should also accept NaNs
      if state in [',', '[']:
        state = 'value'
        value_start = i
      elif state != 'value':
        raise ParseError(f"Unexpected '{ch}' at [{i}]")
  if depth > 0:
    raise ParseError(f"Not enough closing list brackets ({depth} more expected).")


# Using 0 as "[]" value is inconsistent, but it was one of the task requirements.
def flatten(xs, empty_list_value):
  if xs == 'nan':
    yield math.nan
  elif type(xs) == int or type(xs) == float:
    yield xs
  elif type(xs) == list:
    if len(xs) == 0:
      yield empty_list_value
    else:
      for x in xs:
        yield from flatten(x, empty_list_value)
  else:
    raise ParseError(f"Unexpected list value {xs}")


# Using Python eval.
# Slow, but speed was not a requirement. Turns out ast.literal_eval is safe I missed that while googling.
# This still may fail when given deeply nested string.
def python_flattener_3(values_str, empty_list_value):
  try:
    xs = ast.literal_eval(values_str.lower().replace('nan', '"nan"'))
  except SyntaxError or ValueError as e:
    raise ParseError(e)
  if type(xs) != list:
    raise ParseError("Top level value is not a list.")
  yield from flatten(xs, empty_list_value)


# Using JSON parse.
# This one also may be unsafe on some inputs.
def json_flattener_4(values_str, empty_list_value):
  try:
    xs = json.loads(values_str.lower().replace('nan', '"nan"'))
  except (SyntaxError, ValueError, json.decoder.JSONDecodeError) as e:
    # (???) Breakpoint here for JSONDecodeError exception crashes debugger with SIGSEGV.
    raise ParseError(e)
  if type(xs) != list:
    raise ParseError("Top level value is not a list.")
  yield from flatten(xs, empty_list_value)


# This function shouldn't be renamed, so it can be imported in the tests
#
# Note `flattener` parameter was not part of the required API in the task.
# Including it here to test different list parsers.
def parse_compute_averages(input_arguments: str,
                           flattener: Callable[[str, float], list[float]]) -> str:
  pairs = [pair.split('=')
           for pair in input_arguments.split(' ')]
  output = []
  for pair in pairs:
    if len(pair) != 2:
      raise ParseError(f"key-value pair around '=' is malformed: {pair}")
    key, values_str = pair
    if not key.isalnum():
      raise ParseError(f"Key name is invalid: {key}")
    values = flattener(values_str, 0.0)
    average = round(statistics.mean(values), 2)
    output.append(f'{key}={average:.2f}')
  return ' '.join(output)


examples = ["a=[] b=[2] c=[[]] d=[[4]] e=[[1],2] e=[2,[1]] d=[1,[2],3] d=[1,[[2]],[3]]",
            "g=[-1] h=[2.0] i=[-2.0] j=[-1.003,345.65432] k=[[-1],-2] l=[-2,[-1]] m=[-1,[-2],-3] n=[-1,[[-2]],[-3]]",
            "as=[12,2,[-3.4],[-12,12.00,[13000,8]],99] bz=[23,nan] bz=[23]"]
parse_error_examples = ['', '-2', '[]', '[23]', '=[3]', 'eh=45', 'a=[12,,3]',
                        'x=[,]', 'r=[2, 3]', 'z=[2]u=[3]', 'n=[nanan]',
                        'skew=[2[,]]', 'skew=[3,4', 'skew=[3,[4]']
implementations = [string_flattener_1,
                   parser_flattener_2,
                   python_flattener_3,
                   json_flattener_4,
                   ]
# For each implementation the output should be identical.
for impl in implementations:
  print(f"\nImpl {impl.__name__} :")
  for input in examples:
    print(parse_compute_averages(input, impl))
  for input in parse_error_examples:
    try:
      parse_compute_averages(input, impl)
      assert False, f"A ParseError exception is expected on '{input}'."
    except ParseError:
      pass
	import statistics
	import ast
	import math
	import json
	from typing import Callable


	class ParseError(Exception):
	pass


	# Brute-force version :)
	# Works as expected on a valid input but ignores some syntax errors.
	def string_flattener_1(values_str, empty_list_value):
	# This parser is fundamentally sloppy, now trying to cath some input inconsistencies explicitly:
	if not values_str.startswith('[') or not values_str.endswith(']'):
	raise ParseError("Top level value is not a list.")
	if values_str.count('[') != values_str.count(']'):
	raise ParseError("List brackets are not balanced.")

	values_str = (values_str
	.replace('[]', str(empty_list_value))
	.replace('[', '')
	.replace(']', ''))
	for x_str in values_str.split(','):
	if x_str.lower() == 'nan':
	yield math.nan
	else:
	try:
	yield float(x_str)
	except ValueError:
	raise ParseError(f"Numeric value is malformed: {x_str}")


	# Version of _1 with bracket nesting checks.
	# Single loop procedure with depth tracking (alternatively it can be recursive).
	def parser_flattener_2(values_str, empty_list_value):
	depth = 0
	value_start = None
	state = 'start'

	def eval_x():
	x_str = values_str[value_start:i]
	try:
	return float(x_str)
	except ValueError:
	raise ParseError(f"Numeric value is malformed: {x_str}")

	for i in range(len(values_str)):
	ch = values_str[i]
	if ch == '[':
	if state not in [',', '[', 'start']:
	raise ParseError(f"Unexpected start of the list at [{i}]")
	state = '['
	depth += 1
	elif ch == ']':
	if state == 'value':
	yield eval_x()
	elif state == '[':
	yield empty_list_value
	elif state != ']':
	raise ParseError(f"Unexpected end of the list at [{i}]")
	state = ']'
	depth -= 1
	if depth < 0:
	raise ParseError("Unmatched closing bracket.")
	elif ch == ',':
	if state == 'value':
	yield eval_x()
	elif state != ']':
	raise ParseError(f"Unexpected ',' at [{i}]")
	state = ','
	elif ch.isalnum() or ch in ['.', '-']: # Should also accept NaNs
	if state in [',', '[']:
	state = 'value'
	value_start = i
	elif state != 'value':
	raise ParseError(f"Unexpected '{ch}' at [{i}]")
	if depth > 0:
	raise ParseError(f"Not enough closing list brackets ({depth} more expected).")


	# Using 0 as "[]" value is inconsistent, but it was one of the task requirements.
	def flatten(xs, empty_list_value):
	if xs == 'nan':
	yield math.nan
	elif type(xs) == int or type(xs) == float:
	yield xs
	elif type(xs) == list:
	if len(xs) == 0:
	yield empty_list_value
	else:
	for x in xs:
	yield from flatten(x, empty_list_value)
	else:
	raise ParseError(f"Unexpected list value {xs}")


	# Using Python eval.
	# Slow, but speed was not a requirement. Turns out ast.literal_eval is safe I missed that while googling.
	# This still may fail when given deeply nested string.
	def python_flattener_3(values_str, empty_list_value):
	try:
	xs = ast.literal_eval(values_str.lower().replace('nan', '"nan"'))
	except SyntaxError or ValueError as e:
	raise ParseError(e)
	if type(xs) != list:
	raise ParseError("Top level value is not a list.")
	yield from flatten(xs, empty_list_value)


	# Using JSON parse.
	# This one also may be unsafe on some inputs.
	def json_flattener_4(values_str, empty_list_value):
	try:
	xs = json.loads(values_str.lower().replace('nan', '"nan"'))
	except (SyntaxError, ValueError, json.decoder.JSONDecodeError) as e:
	# (???) Breakpoint here for JSONDecodeError exception crashes debugger with SIGSEGV.
	raise ParseError(e)
	if type(xs) != list:
	raise ParseError("Top level value is not a list.")
	yield from flatten(xs, empty_list_value)


	# This function shouldn't be renamed, so it can be imported in the tests
	#
	# Note `flattener` parameter was not part of the required API in the task.
	# Including it here to test different list parsers.
	def parse_compute_averages(input_arguments: str,
	flattener: Callable[[str, float], list[float]]) -> str:
	pairs = [pair.split('=')
	for pair in input_arguments.split(' ')]
	output = []
	for pair in pairs:
	if len(pair) != 2:
	raise ParseError(f"key-value pair around '=' is malformed: {pair}")
	key, values_str = pair
	if not key.isalnum():
	raise ParseError(f"Key name is invalid: {key}")
	values = flattener(values_str, 0.0)
	average = round(statistics.mean(values), 2)
	output.append(f'{key}={average:.2f}')
	return ' '.join(output)


	examples = ["a=[] b=[2] c=[[]] d=[[4]] e=[[1],2] e=[2,[1]] d=[1,[2],3] d=[1,[[2]],[3]]",
	"g=[-1] h=[2.0] i=[-2.0] j=[-1.003,345.65432] k=[[-1],-2] l=[-2,[-1]] m=[-1,[-2],-3] n=[-1,[[-2]],[-3]]",
	"as=[12,2,[-3.4],[-12,12.00,[13000,8]],99] bz=[23,nan] bz=[23]"]
	parse_error_examples = ['', '-2', '[]', '[23]', '=[3]', 'eh=45', 'a=[12,,3]',
	'x=[,]', 'r=[2, 3]', 'z=[2]u=[3]', 'n=[nanan]',
	'skew=[2[,]]', 'skew=[3,4', 'skew=[3,[4]']
	implementations = [string_flattener_1,
	parser_flattener_2,
	python_flattener_3,
	json_flattener_4,
	]
	# For each implementation the output should be identical.
	for impl in implementations:
	print(f"\nImpl {impl.__name__} :")
	for input in examples:
	print(parse_compute_averages(input, impl))
	for input in parse_error_examples:
	try:
	parse_compute_averages(input, impl)
	assert False, f"A ParseError exception is expected on '{input}'."
	except ParseError:
	pass