Skip to content

Instantly share code, notes, and snippets.

@glacjay
Created January 27, 2021 08:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save glacjay/c3cfe25784b3bf8f06b99d92f6c22c87 to your computer and use it in GitHub Desktop.
Save glacjay/c3cfe25784b3bf8f06b99d92f6c22c87 to your computer and use it in GitHub Desktop.
制作 rime 五笔86 单字词库
# forked from https://github.com/yekingyan/rime-wubi-86-single/blob/master/wubi86_single.py
import re
INPUT_FILE_PATH = "wubi86.dict.yaml"
OUTPUT_FILE_PATH = "wubi86.dict.yaml.single"
def line_key(line):
fields = line.split('\t')
code = fields[1]
weight = fields[2] or 0
return (code, -int(weight, 10))
def get_trim_line():
stay_line = []
with open(INPUT_FILE_PATH, encoding="utf8") as f:
trim_working = False
for line in f:
if line == '...\n':
stay_line.append(line)
trim_working = True
continue
if not trim_working or len(line.strip()) == 0:
stay_line.append(line)
else:
line_list = line.split('\t')
word = line_list[0]
code = line_list[1]
if not word.startswith("#") and (len(word) == 1 or code.startswith("z")):
stay_line.append(line)
splitter_index = stay_line.index('...\n')
config_lines = stay_line[0 : splitter_index + 2]
word_lines = stay_line[splitter_index + 2 :]
word_lines.sort(key=lambda line: float((line + '\t0').split('\t')[2]), reverse=True);
word_lines.sort(key=lambda line: line.split('\t')[1]);
word_lines.sort(key=lambda line: len(line.split('\t')[1]));
return config_lines + word_lines
def main():
stay_line = get_trim_line()
with open(OUTPUT_FILE_PATH, "w+", encoding="utf8") as f:
for line in stay_line:
f.writelines(line)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment