Skip to content

Instantly share code, notes, and snippets.

@FZambia
Created December 15, 2012 21:21
Show Gist options
  • Save FZambia/4299340 to your computer and use it in GitHub Desktop.
Save FZambia/4299340 to your computer and use it in GitHub Desktop.
Parse Apache-like configuration files and strings. This is a modified version of initial parser implementation described here: http://www.poldylicious.de/node/25
# coding: utf-8
#
# This is a modified version of initial parser implementation
# described here: http://www.poldylicious.de/node/25
import re
class ApacheConfig(object):
"""
Parse Apache-like configuration files and strings
"""
re_comment = re.compile(r"""^#.*$""")
re_section_start = re.compile(r"""^<(?P<name>[^/\s>]+)\s*(?P<value>[^>]+)?>$""")
re_section_end = re.compile(r"""^</(?P<name>[^\s>]+)\s*>$""")
def __init__(self, name, values=[], section=False):
self.name = name
self.children = []
self.values = values
self.section = section
def add_child(self, child):
"""
Add child to children list and create reference to parent.
"""
self.children.append(child)
child.parent = self
return child
def find(self, path):
"""
Return the first element wich matches the path.
"""
pathelements = path.strip("/").split("/")
if pathelements[0] == '':
return self
return self._find(pathelements)
def _find(self, pathelements):
if pathelements: # there is still more to do ...
next = pathelements.pop(0)
for child in self.children:
if child.name == next:
result = child._find(pathelements)
if result:
return result
return None
else: # no pathelements left, result is self
return self
def findall(self, path):
"""
Return all elements which match the path.
"""
pathelements = path.strip("/").split("/")
if pathelements[0] == '':
return [self]
return self._findall(pathelements)
def _findall(self, pathelements):
if pathelements: # there is still more to do ...
result = []
next = pathelements.pop(0)
for child in self.children:
if child.name == next:
result.extend(child._findall(pathelements))
return result
else: # no pathelements left, result is self
return [self]
def print_r(self, indent = -1):
"""
Recursively print node.
"""
if self.section:
if indent >= 0:
print " " * indent + "<" + self.name + " " + " ".join(self.values) + ">"
for child in self.children:
child.print_r(indent + 1)
if indent >= 0:
print " " * indent + "</" + self.name + ">"
else:
if indent >= 0:
print " " * indent + self.name + " " + " ".join(self.values)
def traverse(self, res={}):
"""
Recursively fill and return config dictionary.
"""
obj = res
if self.section:
if self.name not in res:
obj = res[self.name] = {}
if self.values:
obj = res[self.name][self.values[0]] = {}
else:
obj = res[self.name][""] = {}
children = self.children
if children:
for child in children:
child.traverse(res=obj)
else:
if self.name not in res:
obj = res[self.name] = []
else:
obj = res[self.name]
if self.values:
obj.extend(self.values)
return obj
@classmethod
def parse_file(cls, file):
"""
Parse a file.
"""
f = open(file)
root = cls._parse(f)
f.close()
return root
@classmethod
def parse_string(cls, string):
"""
Parse a string.
"""
return cls._parse(string.splitlines())
@classmethod
def _parse(cls, itobj):
"""
Parse iterable object and return root ApacheConfig object
"""
root = node = ApacheConfig('', section=True)
for line in itobj:
line = line.strip()
if (len(line) == 0) or cls.re_comment.match(line):
continue
match = cls.re_section_start.match(line)
if match:
values = match.group("value")
if values:
values = values.split()
if not values:
values = None
new_node = ApacheConfig(match.group("name"), values=values, section=True)
node = node.add_child(new_node)
continue
match = cls.re_section_end.match(line)
if match:
if node.name != match.group("name"):
raise Exception(
"Section mismatch: %s should be %s" % (
match.group("name"),
node.name
)
)
node = node.parent
continue
values = line.split(" ", 1)
name = values.pop(0)
node.add_child(ApacheConfig(name, values=values, section=False))
return root
if __name__ == "__main__":
config_string = """
global 1
<domain mail>
value 10
value 3
<timeout>
critical 1
warning 2
info 3
</timeout>
</domain>
<domain google>
value 1
</domain>
"""
conf = ApacheConfig.parse_string(config_string)
import pprint
pprint.pprint(conf.traverse())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment