Skip to content

Instantly share code, notes, and snippets.

@mozbugbox
Last active December 30, 2019 14:07
Show Gist options
  • Save mozbugbox/b73f1c10074ed9ba7446993d6b78e55b to your computer and use it in GitHub Desktop.
Save mozbugbox/b73f1c10074ed9ba7446993d6b78e55b to your computer and use it in GitHub Desktop.
中文数字转阿拉伯数字 Convert Chinese number count into Arabic integers.
#!/usr/bin/python3
# vim:fileencoding=utf-8:sw=4:et
# 参考 <https://blog.csdn.net/grllery/article/details/89430363>
def numzh2int(txt):
"""Convert Chinese number count to Arabic integer"""
cn_nums = {'〇': 0, '一': 1, '二': 2, '三': 3, '四': 4,
'五': 5, '六': 6, '七': 7, '八': 8, '九': 9,
'零': 0, '壹': 1, '贰': 2, '叁': 3, '肆': 4,
'伍': 5, '陆': 6, '柒': 7, '捌': 8, '玖': 9,
'貮': 2, '两': 2, }
uint = {'十': 10, '拾': 10,
'百': 100, '佰': 100,
'千': 1000, '仟': 1000,
'万': 10000, '萬': 10000,
'亿': 100000000, '億': 100000000,
'兆': 1000000000000, }
units = set(uint.values())
unit_of_unit = [10**4, 10**8, 10**12] # 万亿兆: unit of unit
nums = []
for c in reversed(txt):
if c in cn_nums:
num = cn_nums[c]
nums.append(num)
else:
u = uint[c]
nums.append(u)
res = 0
unit_c = 1 # current unit
unit_of_unit_c = 1 # current unit of unit
# print(nums)
for i, num in enumerate(nums):
if num in units:
if num in unit_of_unit and num > unit_of_unit_c:
unit_of_unit_c = num
unit_c = num
else:
unit_c = num * unit_of_unit_c
if i == 1: # Fix for 二百五 == [5, 100, 2]
res = res * unit_c // 10
else:
res = res + num * unit_c
unit_c = 1
# Fix for 十一 == [1, 10].
# Extra unit_c not reset by the digit place.
if unit_c > 1:
res += unit_c
return res
def main():
test_nums = [
("八", 8),
("十", 10),
("十一", 11),
("五十", 50),
("五百", 500),
("三千", 3_000),
("一万", 10_000),
("十万", 100_000),
("百万", 1_000_000),
("三十二", 32),
("二百五", 250),
("两万五", 25_000),
("万八千", 18_000),
("一百万", 1_000_000),
("三千万", 30_000_000),
("四万亿", 4_000_000_000_000),
("五百零三", 503),
("五百一十", 510),
("一百二十三", 123),
("一千二百零三", 1_203),
("一万一千一百", 11_100),
("一万一千一百零一", 11_101),
("一万一千一百一十", 11_110),
("十万三千六百零九", 103_609),
("十万零三千六百零九", 103_609),
("一百二十三万四千五百六十七", 1_234_567),
("一千一百二十三万四千五百六十七", 11_234_567),
("一亿一千一百二十三万四千五百六十七", 111_234_567),
("一百零二亿五千零一万一千零三十八", 10_250_011_038),
("一万三千零三十二亿五千四百零一万一千零三十八", 1_303_254_011_038),
("一兆三千零三十二亿五千四百零一万一千零三十八", 1_303_254_011_038),
("一千兆三千零三十二亿五千四百零一万一千零三十八", 1_000_303_254_011_038),
("一万兆三千零三十二亿五千四百零一万一千零三十八", 10_000_303_254_011_038),
]
#item = "一千兆三千零三十二亿五千四百零一万零一千零三十八"; print(f'{item}, {numzh2int(item):,d}'); return
for item, val in test_nums:
r = numzh2int(item)
print(f'{r==val} {item}, {r:,d}')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment