Skip to content

Instantly share code, notes, and snippets.

Created August 27, 2012 07:18
Extract the text of notes from an OpenOffice presentation
from lxml import etree
import zipfile
# some namespaces we may need
DRAWC = "{%s}" % DRAW
PRESC = "{%s}" % PRES
NS = {'presentation': PRES,
"draw":DRAW }
def getTree(path):
return etree.XML(
def printNotes(slides):
for slide in slides:
title = slide.get(DRAWC+"name")
print "---- %s ----" % title
notes = slide.xpath("presentation:notes",namespaces=NS)
for note in notes:
bits = [c.text for c in note.iterdescendants()]
print "\n".join(filter(lambda x: x, bits))
print " "
def getSlides(et):
return et.xpath("//draw:page",namespaces=NS)
if __name__=="__main__":
from sys import argv
filepath = argv[1]
z = zipfile.ZipFile(filepath)
content ="content.xml")
et = getTree(content)
slides = getSlides(et)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment