Created
December 15, 2012 21:21
-
-
Save FZambia/4299340 to your computer and use it in GitHub Desktop.
Parse Apache-like configuration files and strings.
This is a modified version of initial parser implementation described here: http://www.poldylicious.de/node/25
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# | |
# This is a modified version of initial parser implementation | |
# described here: http://www.poldylicious.de/node/25 | |
import re | |
class ApacheConfig(object): | |
""" | |
Parse Apache-like configuration files and strings | |
""" | |
re_comment = re.compile(r"""^#.*$""") | |
re_section_start = re.compile(r"""^<(?P<name>[^/\s>]+)\s*(?P<value>[^>]+)?>$""") | |
re_section_end = re.compile(r"""^</(?P<name>[^\s>]+)\s*>$""") | |
def __init__(self, name, values=[], section=False): | |
self.name = name | |
self.children = [] | |
self.values = values | |
self.section = section | |
def add_child(self, child): | |
""" | |
Add child to children list and create reference to parent. | |
""" | |
self.children.append(child) | |
child.parent = self | |
return child | |
def find(self, path): | |
""" | |
Return the first element wich matches the path. | |
""" | |
pathelements = path.strip("/").split("/") | |
if pathelements[0] == '': | |
return self | |
return self._find(pathelements) | |
def _find(self, pathelements): | |
if pathelements: # there is still more to do ... | |
next = pathelements.pop(0) | |
for child in self.children: | |
if child.name == next: | |
result = child._find(pathelements) | |
if result: | |
return result | |
return None | |
else: # no pathelements left, result is self | |
return self | |
def findall(self, path): | |
""" | |
Return all elements which match the path. | |
""" | |
pathelements = path.strip("/").split("/") | |
if pathelements[0] == '': | |
return [self] | |
return self._findall(pathelements) | |
def _findall(self, pathelements): | |
if pathelements: # there is still more to do ... | |
result = [] | |
next = pathelements.pop(0) | |
for child in self.children: | |
if child.name == next: | |
result.extend(child._findall(pathelements)) | |
return result | |
else: # no pathelements left, result is self | |
return [self] | |
def print_r(self, indent = -1): | |
""" | |
Recursively print node. | |
""" | |
if self.section: | |
if indent >= 0: | |
print " " * indent + "<" + self.name + " " + " ".join(self.values) + ">" | |
for child in self.children: | |
child.print_r(indent + 1) | |
if indent >= 0: | |
print " " * indent + "</" + self.name + ">" | |
else: | |
if indent >= 0: | |
print " " * indent + self.name + " " + " ".join(self.values) | |
def traverse(self, res={}): | |
""" | |
Recursively fill and return config dictionary. | |
""" | |
obj = res | |
if self.section: | |
if self.name not in res: | |
obj = res[self.name] = {} | |
if self.values: | |
obj = res[self.name][self.values[0]] = {} | |
else: | |
obj = res[self.name][""] = {} | |
children = self.children | |
if children: | |
for child in children: | |
child.traverse(res=obj) | |
else: | |
if self.name not in res: | |
obj = res[self.name] = [] | |
else: | |
obj = res[self.name] | |
if self.values: | |
obj.extend(self.values) | |
return obj | |
@classmethod | |
def parse_file(cls, file): | |
""" | |
Parse a file. | |
""" | |
f = open(file) | |
root = cls._parse(f) | |
f.close() | |
return root | |
@classmethod | |
def parse_string(cls, string): | |
""" | |
Parse a string. | |
""" | |
return cls._parse(string.splitlines()) | |
@classmethod | |
def _parse(cls, itobj): | |
""" | |
Parse iterable object and return root ApacheConfig object | |
""" | |
root = node = ApacheConfig('', section=True) | |
for line in itobj: | |
line = line.strip() | |
if (len(line) == 0) or cls.re_comment.match(line): | |
continue | |
match = cls.re_section_start.match(line) | |
if match: | |
values = match.group("value") | |
if values: | |
values = values.split() | |
if not values: | |
values = None | |
new_node = ApacheConfig(match.group("name"), values=values, section=True) | |
node = node.add_child(new_node) | |
continue | |
match = cls.re_section_end.match(line) | |
if match: | |
if node.name != match.group("name"): | |
raise Exception( | |
"Section mismatch: %s should be %s" % ( | |
match.group("name"), | |
node.name | |
) | |
) | |
node = node.parent | |
continue | |
values = line.split(" ", 1) | |
name = values.pop(0) | |
node.add_child(ApacheConfig(name, values=values, section=False)) | |
return root | |
if __name__ == "__main__": | |
config_string = """ | |
global 1 | |
<domain mail> | |
value 10 | |
value 3 | |
<timeout> | |
critical 1 | |
warning 2 | |
info 3 | |
</timeout> | |
</domain> | |
<domain google> | |
value 1 | |
</domain> | |
""" | |
conf = ApacheConfig.parse_string(config_string) | |
import pprint | |
pprint.pprint(conf.traverse()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment