Created
October 15, 2013 12:37
-
-
Save superlou/6990947 to your computer and use it in GitHub Desktop.
Doxygen to .docx via python-docx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import etree as ET | |
import python_docx.docx as docx # requires LXML and PIL | |
def is_boring(text): | |
phrases = [ | |
"data set forth herein", | |
"this file was created by texas instruments", | |
"ti checkin date", | |
"contents:" | |
] | |
for phrase in phrases: | |
if phrase in text.lower(): | |
return True | |
return False | |
class DocBody(object): | |
def __init__(self, body): | |
self.body = body | |
def append_paragraph(self, paratext, log=True): | |
self.body.append(docx.paragraph(paratext)) | |
print "< " + str(paratext) | |
def append_bullet(self, paratext, log=True): | |
self.body.append(docx.paragraph(paratext, style='ListBullet')) | |
print "< * " + paratext | |
def paragraph_writing_state_machine(body, paragraph): | |
state = "begin" | |
context = ET.iterwalk(paragraph, events=("start", "end")) | |
tag_stack = [] | |
text = [] | |
parameter_list = [] | |
parameter_name = "" | |
parameter_description = "" | |
docBody = DocBody(body) | |
for action, element in context: | |
event = " ".join([action, element.tag]) | |
if action == "start": | |
tag_stack.append(element.tag) | |
if action == "end": | |
popped_tag = tag_stack.pop() | |
if popped_tag != element.tag: | |
raise BaseException("Bad XML!") | |
print tag_stack | |
print ("> %s (%s): %s !! %s" % (event, state, element.text, element.tail)) | |
if state == "begin": | |
if event == "start para": | |
text.append(element.text) | |
new_state = "para" | |
if event == "start xrefsect": | |
new_state = "xrefsect" | |
if event == "start parameterlist": | |
paramater_list = {} | |
new_state = "parameterlist" | |
if state == "para": | |
if event == "start itemizedlist": | |
docBody.append_paragraph(text.pop()) | |
new_state = "itemizedlist" | |
if event == "end para": | |
output = "" | |
for phrase in text: | |
if phrase != None and not is_boring(phrase): | |
output = output + phrase | |
if len(output) > 0: | |
docBody.append_paragraph(output) | |
new_state = "begin" | |
if event == "start parameterlist": | |
paramater_list = {} | |
new_state = "parameterlist" | |
if event == "start xrefsect": | |
new_state = "xrefsect" | |
if event == "start simplesect" and element.get('kind') == "rcs": | |
new_state = "rcs simplesect" | |
if event == "start simplesect" and element.get('kind') == "note": | |
new_state = "note simplesect" | |
if event == "start ref": | |
text.append(element.text) | |
new_state = "ref" | |
if event == "start simplesect" and element.get('kind') == "return": | |
new_state = "return simplesect" | |
if event == "start ulink": | |
text.append(element.text) | |
if element.tail: | |
text.append(element.tail) | |
new_state = "para" | |
if state == "xrefsect": | |
if event == "end xrefsect": | |
new_state = "para" | |
if state == "itemizedlist": | |
if event == "start listitem": | |
new_state = "listitem" | |
if event == "end itemizedlist": | |
new_state = "begin" | |
if state == "listitem": | |
if event == "start para": | |
docBody.append_bullet(element.text) | |
new_state = "itemizedlist" | |
if state == "rcs simplesect": | |
if event == "end simplesect": | |
new_state = "para" | |
if state == "ref": | |
if event == "end ref": | |
if element.tail: | |
text.append(element.tail) | |
new_state = "para" | |
if state == "note simplesect": | |
if event == "start para": | |
text.append("Note: " + element.text) | |
new_state = "note simplesect para" | |
if event == "end simplesect": | |
new_state = "begin" | |
if state == "note simplesect para": | |
if event == "end para": | |
output = text.pop() | |
docBody.append_paragraph(output) | |
new_state = "note simplesect" | |
if state == "parameterlist": | |
if event == "start parameteritem": | |
new_state = "parameteritem" | |
if event == "end parameterlist": | |
docBody.append_paragraph([('Parameters', 'b')]) | |
for parameter in parameter_list: | |
docBody.append_paragraph([parameter[0], ": ", parameter[1]]) | |
new_state = "para" | |
if state == "parameteritem": | |
if event == "start parameternamelist": | |
new_state = "parameternamelist" | |
if event == "start parameterdescription": | |
new_state = "parameterdescription" | |
if event == "end parameteritem": | |
parameter_list.append([parameter_name, parameter_description]) | |
new_state = "parameterlist" | |
if state == "parameternamelist": | |
if event == "start parametername": | |
parameter_name = element.text | |
new_state = "parametername" | |
if event == "end parameternamelist": | |
new_state = "parameteritem" | |
if state == "parametername": | |
if event == "end parametername": | |
new_state = "parameternamelist" | |
if state == "parameterdescription": | |
if event == "start para": | |
parameter_description = element.text | |
new_state = "parameterdescription para" | |
if event == "end parameterdescription": | |
new_state = "parameteritem" | |
if state == "parameterdescription para": | |
if event == "end para": | |
new_state = "parameterdescription" | |
if state == "return simplesect": | |
if event == "start para": | |
docBody.append_paragraph([('Returns', 'b')]) | |
docBody.append_paragraph(element.text) | |
new_state = "return simplesect para" | |
if event == "end simplesect": | |
new_state = "para" | |
if state == "return simplesect para": | |
if event == "end para": | |
new_state = "return simplesect" | |
state = new_state |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment