Skip to content

Instantly share code, notes, and snippets.

@ljjjustin
Last active December 15, 2020 08:26
Show Gist options
  • Save ljjjustin/5eac066ea24582b4b55ef4f60e3fe664 to your computer and use it in GitHub Desktop.
Save ljjjustin/5eac066ea24582b4b55ef4f60e3fe664 to your computer and use it in GitHub Desktop.
convert sqlite3 format bible to zefania format
#!/usr/bin/env python
import os
import re
import sys
import sqlite3
import zhconv
book_name_map = {
"創世記": "Genesis",
"出埃及記": "Exodus",
"利未記": "Leviticus",
"民數記": "Numbers",
"申命記": "Deuteronomy",
"約書亞記": "Joshua",
"士師記": "Judges",
"路得記": "Ruth",
"撒母耳記上": "1 Samuel",
"撒母耳記下": "2 Samuel",
"列王紀上": "1 Kings",
"列王紀下": "2 Kings",
"歷代志上": "1 Chronicles",
"歷代志下": "2 Chronicles",
"以斯拉記": "Ezra",
"尼希米記": "Nehemiah",
"以斯帖記": "Esther",
"約伯記": "Job",
"詩篇": "Psalm",
"箴言": "Proverbs",
"傳道書": "Ecclesiastes",
"雅歌": "Song of Solomon",
"以賽亞書": "Isaiah",
"耶利米書": "Jeremiah",
"耶利米哀歌": "Lamentations",
"以西結書": "Ezekiel",
"但以理書": "Daniel",
"何西阿書": "Hosea",
"約珥書": "Joel",
"阿摩司書": "Amos",
"俄巴底亞書": "Obadiah",
"約拿書": "Jonah",
"彌迦書": "Micah",
"那鴻書": "Nahum",
"哈巴谷書": "Habakkuk",
"西番雅書": "Zephaniah",
"哈該書": "Haggai",
"撒迦利亞書": "Zechariah",
"瑪拉基書": "Malachi",
"馬太福音": "Matthew",
"馬可福音": "Mark",
"路加福音": "Luke",
"約翰福音": "John",
"使徒行傳": "Acts",
"羅馬書": "Romans",
"哥林多前書": "1 Corinthians",
"哥林多後書": "2 Corinthians",
"加拉太書": "Galatians",
"以弗所書": "Ephesians",
"腓立比書": "Philippians",
"歌羅西書": "Colossians",
"帖撒羅尼迦前書": "1 Thessalonians",
"帖撒羅尼迦後書": "2 Thessalonians",
"提摩太前書": "1 Timothy",
"提摩太後書": "2 Timothy",
"彼得前書": "1 Peter",
"彼得後書": "2 Peter",
"約翰壹書": "1 John",
"約翰貳書": "2 John",
"約翰參書": "3 John",
"猶大書": "Jude",
"雅各書": "James",
"提多書": "Titus",
"腓利門書": "Philemon",
"希伯來書": "Hebrews",
"啟示錄": "Revelation"
}
ZEFANIA_HEADER='''<?xml version="1.0"?>
<XMLBIBLE biblename="Chinese Contemporary Bible, 2012">
'''
ZEFANIA_FOOTER='''</XMLBIBLE>'''
BOOK_INDENT = ' ' * 4
CHAP_INDENT = ' ' * 8
VERSE_INDENT = ' ' * 12
def usage():
print("usage: %s <source sqlite3 file> <output xml filename>" % sys.argv[0])
sys.exit(-1)
if __name__ == '__main__':
if len(sys.argv) != 3:
usage
sqlite_file = sys.argv[1]
if not os.path.exists(sqlite_file):
print('source sqlite3 file: %s do NOT exists.' % sqlite_file)
usage
output_xml = sys.argv[2]
try:
db = sqlite3.connect(sqlite_file)
db.execute('select count(*) from books')
db.execute('select count(*) from verses')
except Exception as e:
print(e)
sys.exit(-1)
regex = re.compile('</*.*?>')
with open(output_xml, 'w') as f:
f.write(ZEFANIA_HEADER)
books = set()
for book in db.cursor().execute('select book_number from books'):
books.add(book[0])
for idx, book_id in enumerate(sorted(books)):
book_info = db.cursor().execute("select long_name, short_name from books where book_number='%s'" % book_id).fetchone()
long_name = zhconv.convert(book_info[0], 'zh-hans')
short_name = zhconv.convert(book_info[1], 'zh-hans')
#book_name = book_name_map[long_name]
f.write(BOOK_INDENT + '<BIBLEBOOK bnumber="%d" bname="%s" bsname="%s">\n' % (idx+1, long_name, short_name))
chapters = set()
for chap in db.cursor().execute("select chapter from verses where book_number='%s'" % book_id):
chapters.add(chap[0])
for chap_id in sorted(chapters):
f.write(CHAP_INDENT + '<CHAPTER cnumber="%s">\n' % chap_id)
verses = db.cursor().execute("select * from verses where book_number='%d' and chapter='%d'" % (book_id, chap_id)).fetchall()
verse_ids = set()
verses_map = {}
for verse in verses:
verse_ids.add(verse[2])
verses_map[verse[2]] = verse
for verse_id in sorted(verse_ids):
f.write(VERSE_INDENT + '<VERS vnumber="%s">' % (verse_id))
verse_text = verses_map[verse_id][3]
#import pdb; pdb.set_trace()
if verse_text is not None:
strip_text = regex.sub('', verse_text)
f.write(zhconv.convert(strip_text, 'zh-hans'))
f.write("</VERS>\n")
f.write(CHAP_INDENT + '</CHAPTER>\n')
f.write(BOOK_INDENT + '</BIBLEBOOK>\n')
f.write(ZEFANIA_FOOTER)
db.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment