Skip to content

Instantly share code, notes, and snippets.

@sochi
Last active May 25, 2019 21:28
Show Gist options
  • Save sochi/4df20de7814462ee8255f9b424fbb2c0 to your computer and use it in GitHub Desktop.
Save sochi/4df20de7814462ee8255f9b424fbb2c0 to your computer and use it in GitHub Desktop.
short CSV parser in python
from typing import Iterable, List
def parse_line(line: str) -> List[str]:
words = list()
quote_character = None
escaped = False
word = ""
for ch in line:
if escaped:
word += ch
escaped = False
elif quote_character:
if ch == "\\":
escaped = True
elif ch == quote_character:
words.append(word)
word = ""
quote_character = None
else:
word += ch
else:
if ch in ("'", '"'):
quote_character = ch
return words
def parse_csv(lines: Iterable[str]) -> Iterable[List[str]]:
for line in lines:
yield parse_line(line)
#
# example parser input
#
example_input = """
'hello',"world"
"q'uoted",'with"parentheses',
'with"in\\'between',"la\\\\st",'words'
"works", 'also with blank spaces'
""".strip()
#
# executing parser
#
output = parse_csv(example_input.splitlines())
#
# compare with expected outcome
#
assert list(output) == [
["hello", "world"],
["q'uoted", 'with"parentheses'],
["with\"in'between", "la\\st", "words"],
["works", "also with blank spaces"],
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment