nathanalderson/table_parser.py

## table_parser.py
from typing import List, Tuple

def parse_table(table: str) -> Tuple[Tuple, List[Tuple]]:
    """
    Uses the second row of the table to determine column locations, then returns the headers and
    records based on that. See testcase at bottom.
    """
    rows = table.splitlines()
    spans = _get_spans(rows[1])
    headers = _span_vals(rows[0], spans)
    records = [_span_vals(r, spans) for r in rows[2:]]
    return headers, records

def _span_vals(row, spans):
    return tuple(row[start:end].strip() for start, end in spans)

def _get_spans(row):
    state = ' '
    spans = []
    i = 0
    while True:
        new_i, new_state = _find_next_not(state, row, i)
        if state != ' ':
            spans.append((i, new_i))
        i, state = new_i, new_state
        if state is None:
            return spans

def _find_next_not(c, s, i):
    while True:
        try:
            if s[i] != c:
                return i, s[i]
            i += 1
        except IndexError:
            return i, None

#######
# Tests
#######

table1 = """\
col1  c2    column3
----  ----  -------
1     2     3
1111XX2222XX3333333
   1     2        3
   1              3
"""

def test_parse_table():
    headers, records = parse_table(table1)
    assert headers == ("col1", "c2", "column3")
    assert records == [
        ("1","2","3"),
        ("1111", "2222", "3333333"),
        ("1","2","3"),
        ("1", "", "3"),
    ]
	from typing import List, Tuple

	def parse_table(table: str) -> Tuple[Tuple, List[Tuple]]:
	"""
	Uses the second row of the table to determine column locations, then returns the headers and
	records based on that. See testcase at bottom.
	"""
	rows = table.splitlines()
	spans = _get_spans(rows[1])
	headers = _span_vals(rows[0], spans)
	records = [_span_vals(r, spans) for r in rows[2:]]
	return headers, records

	def _span_vals(row, spans):
	return tuple(row[start:end].strip() for start, end in spans)

	def _get_spans(row):
	state = ' '
	spans = []
	i = 0
	while True:
	new_i, new_state = _find_next_not(state, row, i)
	if state != ' ':
	spans.append((i, new_i))
	i, state = new_i, new_state
	if state is None:
	return spans

	def _find_next_not(c, s, i):
	while True:
	try:
	if s[i] != c:
	return i, s[i]
	i += 1
	except IndexError:
	return i, None

	#######
	# Tests
	#######

	table1 = """\
	col1 c2 column3
	---- ---- -------
	1 2 3
	1111XX2222XX3333333
	1 2 3
	1 3
	"""

	def test_parse_table():
	headers, records = parse_table(table1)
	assert headers == ("col1", "c2", "column3")
	assert records == [
	("1","2","3"),
	("1111", "2222", "3333333"),
	("1","2","3"),
	("1", "", "3"),
	]