Skip to content

Instantly share code, notes, and snippets.

@nathans
Created November 12, 2010 23:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nathans/674883 to your computer and use it in GitHub Desktop.
Save nathans/674883 to your computer and use it in GitHub Desktop.
SBLGNT parser, basic framework for openscriptures import
#! /usr/bin/evn python
import xml.sax
class SBLGNTParser(xml.sax.handler.ContentHandler):
"""Class to parse the SBL GNT XML file"""
def __init__(self, parent):
self.init_meta_data()
self.parent = parent
def init_meta_data(self):
self.in_book = 0
self.in_book_title = 0
self.in_paragraph = 0
self.in_verse = 0
self.in_word = 0
self.in_suffix = 0
def startElement(self, name, attrs):
if name == "book":
self.in_book = 1
elif name == "title":
self.in_book_title = 1
elif name == "verse-number":
self.in_verse = 1
elif name == "w":
self.in_word = 1
elif name == "suffix":
self.in_suffix = 1
def characters(self, data):
if self.in_word or self.in_suffix:
# Here handle tokens
self.parent.add_token(data)
elif self.in_book or self.in_book_title or self.in_verse:
# Here handle structs
self.parent.add_token(data)
def endElement(self, name):
self.init_meta_data()
class Command():
def __init__(self):
self.test = 1
self.handler = SBLGNTParser(self)
self.parser = xml.sax.make_parser()
self.parser.setContentHandler(self.handler)
def handle(self):
self.parser.parse("/Users/sminatha/Downloads/SBLGNTxml/sblgnt.xml")
def add_token(self, data):
print data
if __name__ == "__main__":
test = Command()
test.handle()
@westonruter
Copy link

Coolness.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment