Instantly share code, notes, and snippets.

Embed
What would you like to do?
Numbers to Chinese representations converter in Python. 中文数字转换
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Licensed under WTFPL or the Unlicense or CC0.
# This uses Python 3, but it's easy to port to Python 2 by changing
# strings to u'xx'.
import itertools
def num2chinese(num, big=False, simp=True, o=False, twoalt=False):
"""
Converts numbers to Chinese representations.
`big` : use financial characters.
`simp` : use simplified characters instead of traditional characters.
`o` : use 〇 for zero.
`twoalt`: use 两/兩 for two when appropriate.
Note that `o` and `twoalt` is ignored when `big` is used,
and `twoalt` is ignored when `o` is used for formal representations.
"""
# check num first
nd = str(num)
if abs(float(nd)) >= 1e48:
raise ValueError('number out of range')
elif 'e' in nd:
raise ValueError('scientific notation is not supported')
c_symbol = '正负点' if simp else '正負點'
if o: # formal
twoalt = False
if big:
c_basic = '零壹贰叁肆伍陆柒捌玖' if simp else '零壹貳參肆伍陸柒捌玖'
c_unit1 = '拾佰仟'
c_twoalt = '' if simp else ''
else:
c_basic = '〇一二三四五六七八九' if o else '零一二三四五六七八九'
c_unit1 = '十百千'
if twoalt:
c_twoalt = '' if simp else ''
else:
c_twoalt = ''
c_unit2 = '万亿兆京垓秭穰沟涧正载' if simp else '萬億兆京垓秭穰溝澗正載'
revuniq = lambda l: ''.join(k for k, g in itertools.groupby(reversed(l)))
nd = str(num)
result = []
if nd[0] == '+':
result.append(c_symbol[0])
elif nd[0] == '-':
result.append(c_symbol[1])
if '.' in nd:
integer, remainder = nd.lstrip('+-').split('.')
else:
integer, remainder = nd.lstrip('+-'), None
if int(integer):
splitted = [integer[max(i - 4, 0):i]
for i in range(len(integer), 0, -4)]
intresult = []
for nu, unit in enumerate(splitted):
# special cases
if int(unit) == 0: # 0000
intresult.append(c_basic[0])
continue
elif nu > 0 and int(unit) == 2: # 0002
intresult.append(c_twoalt + c_unit2[nu - 1])
continue
ulist = []
unit = unit.zfill(4)
for nc, ch in enumerate(reversed(unit)):
if ch == '0':
if ulist: # ???0
ulist.append(c_basic[0])
elif nc == 0:
ulist.append(c_basic[int(ch)])
elif nc == 1 and ch == '1' and unit[1] == '0':
# special case for tens
# edit the 'elif' if you don't like
# 十四, 三千零十四, 三千三百一十四
ulist.append(c_unit1[0])
elif nc > 1 and ch == '2':
ulist.append(c_twoalt + c_unit1[nc - 1])
else:
ulist.append(c_basic[int(ch)] + c_unit1[nc - 1])
ustr = revuniq(ulist)
if nu == 0:
intresult.append(ustr)
else:
intresult.append(ustr + c_unit2[nu - 1])
result.append(revuniq(intresult).strip(c_basic[0]))
else:
result.append(c_basic[0])
if remainder:
result.append(c_symbol[2])
result.append(''.join(c_basic[int(ch)] for ch in remainder))
return ''.join(result)
@fateleak

This comment has been minimized.

Copy link

fateleak commented Jul 13, 2016

thkx

@sudo-shubham

This comment has been minimized.

Copy link

sudo-shubham commented Jul 25, 2018

Works. Thanks.

@lkfo415579

This comment has been minimized.

Copy link

lkfo415579 commented Aug 13, 2018

works. thanks

@GuanJianChun

This comment has been minimized.

Copy link

GuanJianChun commented Aug 21, 2018

Thanks, it works. I also write a simple reverse function。

from unicodedata import numeric
def chinese2num(s):
    amount = 0
    for ch in s:
        number = numeric(ch)
        if number < 10:
            digit = number
        else:
            amount = (amount + digit) * number if number > amount else amount + digit * number         
            digit = 0
    if len(s) > 1 and numeric(s[-2]) != 0:
        return amount + digit * numeric(s[-2]) / 10
    return amount + digit
@Tristan-J

This comment has been minimized.

Copy link

Tristan-J commented Nov 6, 2018

Thanks, it works. I also write a simple reverse function。

from unicodedata import numeric
def chinese2num(s):
    amount = 0
    for ch in s:
        number = numeric(ch)
        if number < 10:
            digit = number
        else:
            amount = (amount + digit) * number if number > amount else amount + digit * number         
            digit = 0
    if len(s) > 1 and numeric(s[-2]) != 0:
        return amount + digit * numeric(s[-2]) / 10
    return amount + digit

Hi, maybe you need to consider "十二" and I can't say we need this: "len(s) > 1 and numeric(s[-2]) != 0", so my suugestion is:

def chinese2num(s): amount = 0 if s[0] == "十": s = "一"+s for ch in s: number = numeric(ch) if number < 10: digit = number else: amount = (amount + digit) * number if number > amount else amount + digit * number digit = 0 return int(amount + digit)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment