Skip to content

Instantly share code, notes, and snippets.

@N-Coder
Created May 12, 2020 16:28
Show Gist options
  • Save N-Coder/1035c6e8442d6f5f1bbd80a98c748d1d to your computer and use it in GitHub Desktop.
Save N-Coder/1035c6e8442d6f5f1bbd80a98c748d1d to your computer and use it in GitHub Desktop.
ics ContentLineParser
@attr.s(slots=True)
class ContentLineParser(object):
CONTROL = "\x00-\x08\x0A-\x1F\x7F" # All the controls except HTAB
DQUOTE = "\""
ESCAPE = "\\"
QSAFE_CHARS = "[^" + CONTROL + DQUOTE + "]*"
SAFE_CHARS = "[^" + CONTROL + DQUOTE + ",:;]*"
VALUE_CHARS = "[^" + CONTROL + "]*"
IDENTIFIER = "[a-zA-Z0-9\-]+"
line: str = attr.ib()
line_nr: int = attr.ib(default=-1)
delims: Iterator[Match[str]] = attr.ib(init=False)
delim: Match[str] = attr.ib(init=False)
cl: ContentLine = attr.ib(init=False)
param_value_start: int = attr.ib(init=False)
param_values: List[Union[str, QuotedParamValue]] = attr.ib(init=False)
def error(self, msg: str, col1: int = -1, col2: int = -1) -> ValueError:
cols = ""
if col1 != -1:
if col2 != -1:
cols = ":%s-%s" % (col1, col2)
else:
cols = ":%s" % (col1,)
return ValueError("Line %s%s %s: %s" % (self.line_nr, cols, msg, self))
def next_delim(self):
try:
self.delim = next(self.delims)
except StopIteration:
raise self.error("does not contain name-value separator ':'")
def parse(self):
self.delims = iter(re.finditer("[:;]", self.line))
self.next_delim()
name = self.line[:self.delim.start()]
self.cl = ContentLine(name) #, line=self.line_nr)
while True:
# everything before delim.start() is already processed and we should start reading at delim.end()
if self.delim.group() == ":":
self.cl.value = self.line[self.delim.end():]
return self.cl
assert self.delim.group() == ";"
self.parse_param()
def parse_param(self):
try:
param_delim = self.line.index("=", self.delim.end())
except ValueError:
raise self.error("contains param without value", self.delim.end())
# read comma-separated and possibly quoted param values
param_name = self.line[self.delim.end():param_delim]
self.cl.params[param_name] = self.param_values = []
self.param_value_start = param_delim + 1
self.next_delim() # proceed to delim after param value list
has_further_param_value = True
while has_further_param_value:
if self.delim.start() <= self.param_value_start:
raise self.error("contains param with an empty value", self.delim.start(), self.param_value_start)
if self.line[self.param_value_start] == self.DQUOTE:
has_further_param_value = self.parse_quoted_param_val()
else:
has_further_param_value = self.parse_raw_param_val()
def parse_quoted_param_val(self):
self.param_value_start += 1 # skip the quote
try:
param_quote_stop = self.line.index(self.DQUOTE, self.param_value_start)
except ValueError:
raise self.error("contains param missing a closing quote", self.param_value_start)
self.param_values.append(QuotedParamValue(self.line[self.param_value_start:param_quote_stop]))
# reposition the delims sequence, skipping any delimiter until right after the closing quote
self.param_value_start = param_quote_stop + 1
while self.delim.start() < self.param_value_start:
self.next_delim()
# continue with whatever comes after the closing quote
if self.delim.start() == self.param_value_start:
return False # this was the last value for this param
if self.line[self.param_value_start] == ",":
self.param_value_start += 1 # skip the comma and continue with the next param value
return True # there's a next value following for this param
else:
raise self.error("contains param with content trailing after closing quote",
self.param_value_start, self.delim.start())
def parse_raw_param_val(self):
param_comma = self.line.find(",", self.param_value_start, self.delim.start())
if param_comma < 0:
self.param_values.append(self.line[self.param_value_start:self.delim.start()])
return False # this was the last value for this param
else:
self.param_values.append(self.line[self.param_value_start:param_comma])
self.param_value_start = param_comma + 1
return True # there's a next value following for this param
def check(self):
assert re.match(self.IDENTIFIER, self.cl.name)
for key, vals in self.cl.params.items():
assert re.match(self.IDENTIFIER, key)
for val in vals:
if isinstance(QuotedParamValue, val):
assert re.match(self.QSAFE_CHARS, val.data)
else:
assert re.match(self.SAFE_CHARS, val.data)
assert re.match(self.VALUE_CHARS, self.cl.name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment