Skip to content

Instantly share code, notes, and snippets.

@InNoobWeTrust
Last active September 21, 2019 18:18
Show Gist options
  • Save InNoobWeTrust/8c6ab76e9dae9978264ab202f852daee to your computer and use it in GitHub Desktop.
Save InNoobWeTrust/8c6ab76e9dae9978264ab202f852daee to your computer and use it in GitHub Desktop.
(WIP) Stardict converter. Pseudo code. To support making PWA Japanese dictionary. Progress depend on laziness and the desire to procrastinate...
#!/usr/bn/env python3
# coding: utf-8
import json
import struct
DICT_NAMES = ["star_nhatviet", "star_vietnhat"]
DICT_JSON = {
name: {
"bookname": "",
"wordcount": "",
"synwordcount": "",
"idxfilesize": "",
"idxoffsetbits": "",
"author": "",
"email": "",
"website": "",
"description": "",
"date": "",
"sametypesequence": "",
"dicttype": "",
"data": [],
}
for name in DICT_NAMES
}
for name in DICT_NAMES:
print(f"Processing dictionary: {name}...")
with open(f"{name}.ifo", "rt", encoding="utf-8", newline=None) as finfo:
for line in finfo:
if "=" in line:
k, v = line.split("=", 1)
DICT_JSON[name][k] = v.strip()
print(f"{name}: {DICT_JSON[name]}")
with open(f"{name}.idx", "rb") as fidx:
with open(f"{name}.dict", "rb") as fdict:
i = 0
while fidx.read(1):
fidx.seek(-1, 1)
try:
word = b"".join(iter(lambda: fidx.read(1),
b"\0")).decode("utf-8",
errors="ignore")
start = struct.unpack("!I", fidx.read(4))[0]
length = struct.unpack("!I", fidx.read(4))[0]
# print(f"word: {word}\nstart: {start}\nlength: {length}")
except Exception:
exit(1)
fdict.seek(start)
definition = fdict.read(length).decode("utf-8",
errors="ignore")
# print(definition)
DICT_JSON[name]["data"].append({
"word": word,
"definition": definition
})
i += 1
print(f"\r{i} word(s) processed so far...", end="")
print(f"\nDumping to {name}.json...")
with open(f"{name}.json", "w+", encoding="utf-8", newline="\n") as fjson:
json.dump(DICT_JSON[name], fjson)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment