Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@weaming
Created August 20, 2019 04:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save weaming/010d3d0fddcabcd7a3035273b1641f51 to your computer and use it in GitHub Desktop.
Save weaming/010d3d0fddcabcd7a3035273b1641f51 to your computer and use it in GitHub Desktop.
import re
from pypinyin import pinyin, Style
def _recursive(a_list, b_list=None):
# a: [x] or x
if not isinstance(a_list, (list, tuple)):
a_list = [a_list]
# b: [x] or None
b_list = b_list or []
# b: [x, y] or [[x, y], [a, b, c]]
if b_list and isinstance(b_list[0], (list, tuple)):
if len(b_list) > 1:
b_list = get_all_cp(*b_list)
else:
# avoid tail blank list collapse all output
b_list = b_list[0]
# b: [x] or []
if not b_list:
for a in a_list:
yield a
return
# a: [x] or []
if not a_list:
for b in b_list:
yield b
return
# a: [x]
# b: [y]
for a in a_list:
for b in b_list:
if isinstance(b, tuple):
yield (a, *b)
else:
yield a, b
def get_all_cp(*args):
if not args:
return []
return list(_recursive(args[0], args[1:]))
def to_pinyin_list(text):
if not text:
return [text]
zh = re.compile(r'[\u4e00 -\u9fa5]+')
if zh.match(text):
results = pinyin(text, heteronym=True, style=Style.NORMAL, strict=False)
print('=>', results)
return list(get_all_cp(*results))
return [text]
print(to_pinyin_list('单白重'))
first = "单"
last = "白重"
first_pinyin_list = [''.join(x) for x in to_pinyin_list(first)]
last_pinyin_list = [''.join(x) for x in to_pinyin_list(last)]
first_last_pinyin_list = get_all_cp(first_pinyin_list, last_pinyin_list)
print(first_last_pinyin_list)
@weaming
Copy link
Author

weaming commented Aug 20, 2019

Output:

=> [['dan', 'chan', 'shan'], ['bai', 'bo'], ['zhong', 'chong', 'tong']]
[('dan', 'bai', 'zhong'), ('dan', 'bai', 'chong'), ('dan', 'bai', 'tong'), ('dan', 'bo', 'zhong'), ('dan', 'bo', 'chong'), ('dan', 'bo', 'tong'), ('chan', 'bai', 'zhong'), ('chan', 'bai', 'chong'), ('chan', 'bai', 'tong'), ('chan', 'bo', 'zhong'), ('chan', 'bo', 'chong'), ('chan', 'bo', 'tong'), ('shan', 'bai', 'zhong'), ('shan', 'bai', 'chong'), ('shan', 'bai', 'tong'), ('shan', 'bo', 'zhong'), ('shan', 'bo', 'chong'), ('shan', 'bo', 'tong')]


=> [['dan', 'chan', 'shan']]
=> [['bai', 'bo'], ['zhong', 'chong', 'tong']]
[('dan', 'baizhong'), ('dan', 'baichong'), ('dan', 'baitong'), ('dan', 'bozhong'), ('dan', 'bochong'), ('dan', 'botong'), ('chan', 'baizhong'), ('chan', 'baichong'), ('chan', 'baitong'), ('chan', 'bozhong'), ('chan', 'bochong'), ('chan', 'botong'), ('shan', 'baizhong'), ('shan', 'baichong'), ('shan', 'baitong'), ('shan', 'bozhong'), ('shan', 'bochong'), ('shan', 'botong')]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment