Created
May 12, 2020 16:28
-
-
Save N-Coder/1035c6e8442d6f5f1bbd80a98c748d1d to your computer and use it in GitHub Desktop.
ics ContentLineParser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@attr.s(slots=True) | |
class ContentLineParser(object): | |
CONTROL = "\x00-\x08\x0A-\x1F\x7F" # All the controls except HTAB | |
DQUOTE = "\"" | |
ESCAPE = "\\" | |
QSAFE_CHARS = "[^" + CONTROL + DQUOTE + "]*" | |
SAFE_CHARS = "[^" + CONTROL + DQUOTE + ",:;]*" | |
VALUE_CHARS = "[^" + CONTROL + "]*" | |
IDENTIFIER = "[a-zA-Z0-9\-]+" | |
line: str = attr.ib() | |
line_nr: int = attr.ib(default=-1) | |
delims: Iterator[Match[str]] = attr.ib(init=False) | |
delim: Match[str] = attr.ib(init=False) | |
cl: ContentLine = attr.ib(init=False) | |
param_value_start: int = attr.ib(init=False) | |
param_values: List[Union[str, QuotedParamValue]] = attr.ib(init=False) | |
def error(self, msg: str, col1: int = -1, col2: int = -1) -> ValueError: | |
cols = "" | |
if col1 != -1: | |
if col2 != -1: | |
cols = ":%s-%s" % (col1, col2) | |
else: | |
cols = ":%s" % (col1,) | |
return ValueError("Line %s%s %s: %s" % (self.line_nr, cols, msg, self)) | |
def next_delim(self): | |
try: | |
self.delim = next(self.delims) | |
except StopIteration: | |
raise self.error("does not contain name-value separator ':'") | |
def parse(self): | |
self.delims = iter(re.finditer("[:;]", self.line)) | |
self.next_delim() | |
name = self.line[:self.delim.start()] | |
self.cl = ContentLine(name) #, line=self.line_nr) | |
while True: | |
# everything before delim.start() is already processed and we should start reading at delim.end() | |
if self.delim.group() == ":": | |
self.cl.value = self.line[self.delim.end():] | |
return self.cl | |
assert self.delim.group() == ";" | |
self.parse_param() | |
def parse_param(self): | |
try: | |
param_delim = self.line.index("=", self.delim.end()) | |
except ValueError: | |
raise self.error("contains param without value", self.delim.end()) | |
# read comma-separated and possibly quoted param values | |
param_name = self.line[self.delim.end():param_delim] | |
self.cl.params[param_name] = self.param_values = [] | |
self.param_value_start = param_delim + 1 | |
self.next_delim() # proceed to delim after param value list | |
has_further_param_value = True | |
while has_further_param_value: | |
if self.delim.start() <= self.param_value_start: | |
raise self.error("contains param with an empty value", self.delim.start(), self.param_value_start) | |
if self.line[self.param_value_start] == self.DQUOTE: | |
has_further_param_value = self.parse_quoted_param_val() | |
else: | |
has_further_param_value = self.parse_raw_param_val() | |
def parse_quoted_param_val(self): | |
self.param_value_start += 1 # skip the quote | |
try: | |
param_quote_stop = self.line.index(self.DQUOTE, self.param_value_start) | |
except ValueError: | |
raise self.error("contains param missing a closing quote", self.param_value_start) | |
self.param_values.append(QuotedParamValue(self.line[self.param_value_start:param_quote_stop])) | |
# reposition the delims sequence, skipping any delimiter until right after the closing quote | |
self.param_value_start = param_quote_stop + 1 | |
while self.delim.start() < self.param_value_start: | |
self.next_delim() | |
# continue with whatever comes after the closing quote | |
if self.delim.start() == self.param_value_start: | |
return False # this was the last value for this param | |
if self.line[self.param_value_start] == ",": | |
self.param_value_start += 1 # skip the comma and continue with the next param value | |
return True # there's a next value following for this param | |
else: | |
raise self.error("contains param with content trailing after closing quote", | |
self.param_value_start, self.delim.start()) | |
def parse_raw_param_val(self): | |
param_comma = self.line.find(",", self.param_value_start, self.delim.start()) | |
if param_comma < 0: | |
self.param_values.append(self.line[self.param_value_start:self.delim.start()]) | |
return False # this was the last value for this param | |
else: | |
self.param_values.append(self.line[self.param_value_start:param_comma]) | |
self.param_value_start = param_comma + 1 | |
return True # there's a next value following for this param | |
def check(self): | |
assert re.match(self.IDENTIFIER, self.cl.name) | |
for key, vals in self.cl.params.items(): | |
assert re.match(self.IDENTIFIER, key) | |
for val in vals: | |
if isinstance(QuotedParamValue, val): | |
assert re.match(self.QSAFE_CHARS, val.data) | |
else: | |
assert re.match(self.SAFE_CHARS, val.data) | |
assert re.match(self.VALUE_CHARS, self.cl.name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment