Skip to content

Instantly share code, notes, and snippets.

@nathanalderson
Created September 19, 2019 19:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nathanalderson/a94ce4a999044ee4a3aafe974c1921d2 to your computer and use it in GitHub Desktop.
Save nathanalderson/a94ce4a999044ee4a3aafe974c1921d2 to your computer and use it in GitHub Desktop.
Parse a certain type of text-based table in python
from typing import List, Tuple
def parse_table(table: str) -> Tuple[Tuple, List[Tuple]]:
"""
Uses the second row of the table to determine column locations, then returns the headers and
records based on that. See testcase at bottom.
"""
rows = table.splitlines()
spans = _get_spans(rows[1])
headers = _span_vals(rows[0], spans)
records = [_span_vals(r, spans) for r in rows[2:]]
return headers, records
def _span_vals(row, spans):
return tuple(row[start:end].strip() for start, end in spans)
def _get_spans(row):
state = ' '
spans = []
i = 0
while True:
new_i, new_state = _find_next_not(state, row, i)
if state != ' ':
spans.append((i, new_i))
i, state = new_i, new_state
if state is None:
return spans
def _find_next_not(c, s, i):
while True:
try:
if s[i] != c:
return i, s[i]
i += 1
except IndexError:
return i, None
#######
# Tests
#######
table1 = """\
col1 c2 column3
---- ---- -------
1 2 3
1111XX2222XX3333333
1 2 3
1 3
"""
def test_parse_table():
headers, records = parse_table(table1)
assert headers == ("col1", "c2", "column3")
assert records == [
("1","2","3"),
("1111", "2222", "3333333"),
("1","2","3"),
("1", "", "3"),
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment