EdisonChendi/trunc.py

## trunc.py
# coding=utf-8

def trunc(s, limit, coding="UTF-8", postfix="..."):
    '''
    works both on python2 and python3
    '''
    unicode_s = s.decode(coding) if type(s) == bytes else s
    nums = (len(u.encode(coding)) for u in unicode_s)
    sum, i = 0, 0
    use_postfix = ""
    for i,n in enumerate(nums):
        if sum+n > limit:
            use_postfix = postfix
            break
        else:
            sum += n
    return unicode_s[:i] + use_postfix

# py2
a = u"你好世界，" * 100
trunc_a = trunc(a, 50)
print(trunc_a)
b = "你好世界，" * 100
trunc_b = trunc(b, 50)
print(trunc_b)
c = u'你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
trunc_c = trunc(c, 20, coding="gb2312")
print(trunc_c)

# py3
# a = "你好世界，" * 100
# trunc_a = trunc(a, 50)
# print(trunc_a)
# b = bytes("你好世界，" * 100, "UTF-8")
# trunc_b = trunc(b, 50)
# print(trunc_b)
# c = '你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
# trunc_c = trunc(c, 20, coding="gb2312")
# print(trunc_c)
	# coding=utf-8

	def trunc(s, limit, coding="UTF-8", postfix="..."):
	'''
	works both on python2 and python3
	'''
	unicode_s = s.decode(coding) if type(s) == bytes else s
	nums = (len(u.encode(coding)) for u in unicode_s)
	sum, i = 0, 0
	use_postfix = ""
	for i,n in enumerate(nums):
	if sum+n > limit:
	use_postfix = postfix
	break
	else:
	sum += n
	return unicode_s[:i] + use_postfix

	# py2
	a = u"你好世界，" * 100
	trunc_a = trunc(a, 50)
	print(trunc_a)
	b = "你好世界，" * 100
	trunc_b = trunc(b, 50)
	print(trunc_b)
	c = u'你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
	trunc_c = trunc(c, 20, coding="gb2312")
	print(trunc_c)

	# py3
	# a = "你好世界，" * 100
	# trunc_a = trunc(a, 50)
	# print(trunc_a)
	# b = bytes("你好世界，" * 100, "UTF-8")
	# trunc_b = trunc(b, 50)
	# print(trunc_b)
	# c = '你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
	# trunc_c = trunc(c, 20, coding="gb2312")
	# print(trunc_c)