-
-
Save VictoriousRaptor/9ad75a204a5eed56a2dd7c276f89e9a7 to your computer and use it in GitHub Desktop.
修正微软拼音输入法无法添加多个格式化自定义短语的问题,添加 sj 和 rq 两个自定义短语
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
修正微软拼音输入法无法添加多个格式化自定义短语的问题 | |
Author: Scruel Tao | |
""" | |
import os | |
import re | |
# CUSTOM: 下面设置自定义短语,格式<拼音 位置 短语>,一行一项,短语中可放心包含空格 | |
PHRASE_ITEMS = """ | |
rq 1 %yyyy%-%MM%-%dd% | |
sj 1 %yyyy%-%MM%-%dd% %HH%:%mm%:%ss% | |
""".strip() | |
HEADER_LEN = 16 + 4 | |
PHRASE_64PCNT_POS = HEADER_LEN | |
TOTAL_BYTES_POS = HEADER_LEN + 4 | |
PHRASE_CNT_POS = HEADER_LEN + 8 | |
PADDED_ENCODING = 'utf-16le' | |
HEADER_BYTES = bytes('mschxudp', encoding='ascii') | |
HEADER_BYTES = HEADER_BYTES + bytes('\x02\x60\x01\x00', PADDED_ENCODING) | |
PHRASE_SEPARATOR_BYTES = b'\x00\x00' | |
PHRASE_SEPARATOR_SIZE = len(PHRASE_SEPARATOR_BYTES) | |
PHRASE_LEN_FIRST_POS = PHRASE_CNT_POS + 40 | |
def read_bytes(position, length=1): | |
with open(lex_file, 'rb+') as file: | |
file.seek(position) | |
return file.read(length) | |
def replace_bytes(position, value): | |
with open(lex_file, 'rb+') as file: | |
file.seek(position) | |
data = file.read() | |
file.seek(position) | |
file.write(value + data[len(value):]) | |
def bytes2int(data): | |
return int.from_bytes(data, byteorder='little') | |
def int2bytes(data, length=1): | |
return int.to_bytes(data, length=length, byteorder='little') | |
def padded_bytes(s): | |
def padded_byte(c): | |
b = bytes(c, PADDED_ENCODING) | |
return b + b'\x00' if len(b) == 1 else b | |
return b''.join([padded_byte(c) for c in s]) | |
def get_phrase_header(header_pinyin_len, index): | |
return (b'\x10\x00\x10\x00' + int2bytes(header_pinyin_len, 2) | |
+ int2bytes(index) + b'\x06\x00\x00\x00\x00' + b'\x00\x00' | |
+ phrase_fixed_last_bytes) | |
print(f"==================\n" | |
f"Author: Scruel Tao\n" | |
f"==================\n\n" | |
f"正在修正巨硬拼音并添加\n" | |
f"预置的日期格式化短语……\n" | |
) | |
lex_file = os.path.join(os.getenv('APPDATA'), | |
r'Microsoft\InputMethod\Chs\ChsPinyinEUDPv1.lex') | |
last_phrase_pos = 0 | |
phrase_list = [] # (is_new, pinyin, header, phrase)) | |
phrase_fixed_last_bytes = b'\xA5\x2C' | |
if not os.path.exists(lex_file): | |
with open(lex_file, 'wb') as f: | |
# Initing lex file | |
f.write(HEADER_BYTES) | |
f.write((b'\x40' + b'\x00' * 3) * 3) | |
f.write(b'\x00' * 4) | |
f.write(b'\x38\xd2\xa3\x65') | |
f.write(b'\x00' * 32) | |
else: | |
phrase_cnt = bytes2int(read_bytes(PHRASE_CNT_POS, 4)) | |
phrase_block_first_pos = PHRASE_LEN_FIRST_POS + 4 * (phrase_cnt - 1) | |
# Read existing phrases | |
for i in range(phrase_cnt): | |
if i == phrase_cnt - 1: | |
phrase_block_pos = phrase_block_len = -1 | |
else: | |
phrase_block_pos = bytes2int( | |
read_bytes(PHRASE_LEN_FIRST_POS + i * 4, 4)) | |
phrase_block_len = phrase_block_pos - last_phrase_pos | |
phrase_block_bytes = read_bytes( | |
phrase_block_first_pos + last_phrase_pos, phrase_block_len) | |
last_phrase_pos = phrase_block_pos | |
pinyin_bytes, phrase_bytes = re.match( | |
(b'(.+)' + PHRASE_SEPARATOR_BYTES) * 2, phrase_block_bytes[16:]).groups() | |
phrase_fixed_last_bytes = phrase_block_bytes[14:16] | |
# Prevent deleted phrases | |
if phrase_block_bytes[9:10] == b'\x00': | |
phrase_list.append((0, pinyin_bytes, | |
phrase_block_bytes[:16], phrase_bytes)) | |
# Fix custom phrases | |
for item in PHRASE_ITEMS.split('\n'): | |
if not item: | |
continue | |
pinyin, index, phrase = item.split(maxsplit=2) | |
pinyin_bytes = padded_bytes(pinyin) | |
phrase_bytes = padded_bytes(phrase) | |
phrase_list = [x for x in phrase_list if x[0] or not x[1] == pinyin_bytes] | |
header = get_phrase_header( | |
16 + len(pinyin_bytes) + PHRASE_SEPARATOR_SIZE, int(index)) | |
phrase_list.append((1, pinyin_bytes, header, phrase_bytes)) | |
# Necessary fix, otherwise the order of phrases will be messed up. | |
phrase_list.sort(key=lambda x: x[1]) | |
# Write phrases | |
tolast_phrase_pos = 0 | |
total_size = PHRASE_LEN_FIRST_POS | |
with open(lex_file, 'rb+') as file: | |
file.seek(PHRASE_LEN_FIRST_POS) | |
file.truncate() | |
for _, *items in phrase_list[:-1]: | |
phrase_len = sum(map(len, items)) + PHRASE_SEPARATOR_SIZE * 2 | |
tolast_phrase_pos += phrase_len | |
file.write(int2bytes(tolast_phrase_pos, length=4)) | |
total_size += PHRASE_SEPARATOR_SIZE * 2 | |
for _, pinyin_bytes, header, phrase_bytes in phrase_list: | |
file.write(header) | |
data_bytes = PHRASE_SEPARATOR_BYTES.join( | |
[pinyin_bytes, phrase_bytes, b'']) | |
file.write(data_bytes) | |
total_size += len(header) + len(data_bytes) | |
# Fix file header | |
replace_bytes(PHRASE_64PCNT_POS, int2bytes( | |
64 + len(phrase_list) * 4, length=4)) | |
replace_bytes(PHRASE_CNT_POS, int2bytes(len(phrase_list), length=4)) | |
replace_bytes(TOTAL_BYTES_POS, int2bytes(total_size, length=4)) | |
print('Done') | |
os.system('pause') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment