Skip to content

Instantly share code, notes, and snippets.

@komasaru
Created March 4, 2018 03:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save komasaru/b25cbdf754971f920dd2f5743e950c7d to your computer and use it in GitHub Desktop.
Save komasaru/b25cbdf754971f920dd2f5743e950c7d to your computer and use it in GitHub Desktop.
Python script to truncate string, including multi-byte characters.
#! /usr/local/bin/python3.6
"""
全角文字を2byte換算し、指定バイトで切り捨て(省略文字設定可)
※正確には、「全角文字」ではなく1byte超の文字
"""
ENC = "utf-8"
def trunc_str(src, trunc_at, om = ""):
str_size, str_bytesize = len(src), len(src.encode(ENC))
om_size = (len(om.encode(ENC))- len(om)) // 2 + len(om)
if str_size == str_bytesize:
if str_size <= trunc_at:
return src
else:
return src[:(trunc_at - om_size)] + om
if (str_bytesize - str_size) // 2 + str_size <= trunc_at:
return src
for i in range(str_size):
s = (len(src[:(i + 1)].encode(ENC)) - len(src[:(i + 1)])) // 2 \
+ len(src[:(i + 1)])
if s < trunc_at - om_size:
continue
elif s == trunc_at - om_size:
return src[:(i + 1)] + om
else:
return src[:i] + om
return src
src = "abcdefg"
print('src =', src)
print('trunc_str(src, 5) =', trunc_str(src, 5))
print('trunc_str(src, 6) =', trunc_str(src, 6))
print('trunc_str(src, 7) =', trunc_str(src, 7))
print('trunc_str(src, 8) =', trunc_str(src, 8))
print('trunc_str(src, 5, "...") =', trunc_str(src, 5, "..."))
print('trunc_str(src, 6, "...") =', trunc_str(src, 6, "..."))
print('trunc_str(src, 7, "...") =', trunc_str(src, 7, "..."))
print('trunc_str(src, 8, "...") =', trunc_str(src, 8, "..."))
print()
src = "AあBいCうDえEお"
print('src =', src)
print('trunc_str(src, 12) =', trunc_str(src, 12))
print('trunc_str(src, 13) =', trunc_str(src, 13))
print('trunc_str(src, 14) =', trunc_str(src, 14))
print('trunc_str(src, 15) =', trunc_str(src, 15))
print('trunc_str(src, 16) =', trunc_str(src, 16))
print('trunc_str(src, 12, "...") =', trunc_str(src, 12, "..."))
print('trunc_str(src, 13, "...") =', trunc_str(src, 13, "..."))
print('trunc_str(src, 14, "...") =', trunc_str(src, 14, "..."))
print('trunc_str(src, 15, "...") =', trunc_str(src, 15, "..."))
print('trunc_str(src, 16, "...") =', trunc_str(src, 16, "..."))
print('trunc_str(src, 12, "(続く)") =', trunc_str(src, 12, "(続く)"))
print('trunc_str(src, 13, "(続く)") =', trunc_str(src, 13, "(続く)"))
print('trunc_str(src, 14, "(続く)") =', trunc_str(src, 14, "(続く)"))
print('trunc_str(src, 15, "(続く)") =', trunc_str(src, 15, "(続く)"))
print('trunc_str(src, 16, "(続く)") =', trunc_str(src, 16, "(続く)"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment