Skip to content

Instantly share code, notes, and snippets.

@superlou
Created October 15, 2013 12:37
Show Gist options
  • Save superlou/6990947 to your computer and use it in GitHub Desktop.
Save superlou/6990947 to your computer and use it in GitHub Desktop.
Doxygen to .docx via python-docx
from lxml import etree as ET
import python_docx.docx as docx # requires LXML and PIL
def is_boring(text):
phrases = [
"data set forth herein",
"this file was created by texas instruments",
"ti checkin date",
"contents:"
]
for phrase in phrases:
if phrase in text.lower():
return True
return False
class DocBody(object):
def __init__(self, body):
self.body = body
def append_paragraph(self, paratext, log=True):
self.body.append(docx.paragraph(paratext))
print "< " + str(paratext)
def append_bullet(self, paratext, log=True):
self.body.append(docx.paragraph(paratext, style='ListBullet'))
print "< * " + paratext
def paragraph_writing_state_machine(body, paragraph):
state = "begin"
context = ET.iterwalk(paragraph, events=("start", "end"))
tag_stack = []
text = []
parameter_list = []
parameter_name = ""
parameter_description = ""
docBody = DocBody(body)
for action, element in context:
event = " ".join([action, element.tag])
if action == "start":
tag_stack.append(element.tag)
if action == "end":
popped_tag = tag_stack.pop()
if popped_tag != element.tag:
raise BaseException("Bad XML!")
print tag_stack
print ("> %s (%s): %s !! %s" % (event, state, element.text, element.tail))
if state == "begin":
if event == "start para":
text.append(element.text)
new_state = "para"
if event == "start xrefsect":
new_state = "xrefsect"
if event == "start parameterlist":
paramater_list = {}
new_state = "parameterlist"
if state == "para":
if event == "start itemizedlist":
docBody.append_paragraph(text.pop())
new_state = "itemizedlist"
if event == "end para":
output = ""
for phrase in text:
if phrase != None and not is_boring(phrase):
output = output + phrase
if len(output) > 0:
docBody.append_paragraph(output)
new_state = "begin"
if event == "start parameterlist":
paramater_list = {}
new_state = "parameterlist"
if event == "start xrefsect":
new_state = "xrefsect"
if event == "start simplesect" and element.get('kind') == "rcs":
new_state = "rcs simplesect"
if event == "start simplesect" and element.get('kind') == "note":
new_state = "note simplesect"
if event == "start ref":
text.append(element.text)
new_state = "ref"
if event == "start simplesect" and element.get('kind') == "return":
new_state = "return simplesect"
if event == "start ulink":
text.append(element.text)
if element.tail:
text.append(element.tail)
new_state = "para"
if state == "xrefsect":
if event == "end xrefsect":
new_state = "para"
if state == "itemizedlist":
if event == "start listitem":
new_state = "listitem"
if event == "end itemizedlist":
new_state = "begin"
if state == "listitem":
if event == "start para":
docBody.append_bullet(element.text)
new_state = "itemizedlist"
if state == "rcs simplesect":
if event == "end simplesect":
new_state = "para"
if state == "ref":
if event == "end ref":
if element.tail:
text.append(element.tail)
new_state = "para"
if state == "note simplesect":
if event == "start para":
text.append("Note: " + element.text)
new_state = "note simplesect para"
if event == "end simplesect":
new_state = "begin"
if state == "note simplesect para":
if event == "end para":
output = text.pop()
docBody.append_paragraph(output)
new_state = "note simplesect"
if state == "parameterlist":
if event == "start parameteritem":
new_state = "parameteritem"
if event == "end parameterlist":
docBody.append_paragraph([('Parameters', 'b')])
for parameter in parameter_list:
docBody.append_paragraph([parameter[0], ": ", parameter[1]])
new_state = "para"
if state == "parameteritem":
if event == "start parameternamelist":
new_state = "parameternamelist"
if event == "start parameterdescription":
new_state = "parameterdescription"
if event == "end parameteritem":
parameter_list.append([parameter_name, parameter_description])
new_state = "parameterlist"
if state == "parameternamelist":
if event == "start parametername":
parameter_name = element.text
new_state = "parametername"
if event == "end parameternamelist":
new_state = "parameteritem"
if state == "parametername":
if event == "end parametername":
new_state = "parameternamelist"
if state == "parameterdescription":
if event == "start para":
parameter_description = element.text
new_state = "parameterdescription para"
if event == "end parameterdescription":
new_state = "parameteritem"
if state == "parameterdescription para":
if event == "end para":
new_state = "parameterdescription"
if state == "return simplesect":
if event == "start para":
docBody.append_paragraph([('Returns', 'b')])
docBody.append_paragraph(element.text)
new_state = "return simplesect para"
if event == "end simplesect":
new_state = "para"
if state == "return simplesect para":
if event == "end para":
new_state = "return simplesect"
state = new_state
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment