Skip to content

Instantly share code, notes, and snippets.

@etsune
Created May 22, 2022 06:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save etsune/a5d67620dc5bf6f0b96982226b2c9559 to your computer and use it in GitHub Desktop.
Save etsune/a5d67620dc5bf6f0b96982226b2c9559 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import re, json, pathlib, sys
from time import time
dictFile = '西夏(俗).txt'
outputDirectory = 'yomi'
def writeDictionaryToFile(data, count):
filename = outputDirectory + f'/term_bank_{count}.json'
pathlib.Path(outputDirectory).mkdir(parents=True, exist_ok=True)
with open(filename, 'w') as f:
json.dump(data, f)
def createIndexFile(name):
data = {
"title":name,
"format":3,
"revision":f'{time()}',
"sequenced":True,
}
filename = outputDirectory + '/index.json'
pathlib.Path(outputDirectory).mkdir(parents=True, exist_ok=True)
with open(filename, 'w') as f:
json.dump(data, f)
def cleanBBTags(line):
line = re.sub(r'\[\/?([\*cebip]|m\d*|ex|c\s[a-z]+)\]', '', line)
line = line.replace("\\[", "[").replace("\\]", "]")
return line
class Entry(object):
expression = ''
reading = ''
definitionTags = ''
rules = ''
score = 1
glossary = ''
sequence = 1791660
termTags = ''
def exportToYomichanArray(self):
return [self.expression.strip(), self.reading.strip(), self.definitionTags, self.rules,
self.score, [self.glossary.strip()], self.sequence, self.termTags]
if (len(sys.argv) != 3):
print("Incorrect number of arguments, should be 2.")
sys.exit()
dictFile = sys.argv[1]
outputDirectory = sys.argv[2]
createIndexFile(dictFile)
dictionaryTxt = open(dictFile, mode='r', encoding='utf-8')
entriesCountTotal = 0
filesCount = 1
currentEntry = Entry()
yomichanDictionary = []
endOfTheEntry = False
isBody = False
for line in dictionaryTxt:
line = line.strip()
if not line or line[0] == '#' or line[0] == '{':
continue
if line[0] == '[':
isBody = True
elif isBody:
endOfTheEntry = True
isBody = False
if endOfTheEntry:
yomichanDictionary.append(currentEntry.exportToYomichanArray())
endOfTheEntry = False
currentEntry = Entry()
line = cleanBBTags(line)
if isBody:
currentEntry.glossary += "\n" + line
else:
currentEntry.expression = line
if len(yomichanDictionary) >= 10000:
entriesCountTotal += len(yomichanDictionary)
writeDictionaryToFile(yomichanDictionary, filesCount)
filesCount += 1
yomichanDictionary = []
if currentEntry:
yomichanDictionary.append(currentEntry.exportToYomichanArray())
if len(yomichanDictionary) > 0:
writeDictionaryToFile(yomichanDictionary, filesCount)
entriesCountTotal += len(yomichanDictionary)
print("Total entries: %d" % entriesCountTotal)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment