Python script to truncate string, including multi-byte characters.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/local/bin/python3.6 | |
""" | |
全角文字を2byte換算し、指定バイトで切り捨て(省略文字設定可) | |
※正確には、「全角文字」ではなく1byte超の文字 | |
""" | |
ENC = "utf-8" | |
def trunc_str(src, trunc_at, om = ""): | |
str_size, str_bytesize = len(src), len(src.encode(ENC)) | |
om_size = (len(om.encode(ENC))- len(om)) // 2 + len(om) | |
if str_size == str_bytesize: | |
if str_size <= trunc_at: | |
return src | |
else: | |
return src[:(trunc_at - om_size)] + om | |
if (str_bytesize - str_size) // 2 + str_size <= trunc_at: | |
return src | |
for i in range(str_size): | |
s = (len(src[:(i + 1)].encode(ENC)) - len(src[:(i + 1)])) // 2 \ | |
+ len(src[:(i + 1)]) | |
if s < trunc_at - om_size: | |
continue | |
elif s == trunc_at - om_size: | |
return src[:(i + 1)] + om | |
else: | |
return src[:i] + om | |
return src | |
src = "abcdefg" | |
print('src =', src) | |
print('trunc_str(src, 5) =', trunc_str(src, 5)) | |
print('trunc_str(src, 6) =', trunc_str(src, 6)) | |
print('trunc_str(src, 7) =', trunc_str(src, 7)) | |
print('trunc_str(src, 8) =', trunc_str(src, 8)) | |
print('trunc_str(src, 5, "...") =', trunc_str(src, 5, "...")) | |
print('trunc_str(src, 6, "...") =', trunc_str(src, 6, "...")) | |
print('trunc_str(src, 7, "...") =', trunc_str(src, 7, "...")) | |
print('trunc_str(src, 8, "...") =', trunc_str(src, 8, "...")) | |
print() | |
src = "AあBいCうDえEお" | |
print('src =', src) | |
print('trunc_str(src, 12) =', trunc_str(src, 12)) | |
print('trunc_str(src, 13) =', trunc_str(src, 13)) | |
print('trunc_str(src, 14) =', trunc_str(src, 14)) | |
print('trunc_str(src, 15) =', trunc_str(src, 15)) | |
print('trunc_str(src, 16) =', trunc_str(src, 16)) | |
print('trunc_str(src, 12, "...") =', trunc_str(src, 12, "...")) | |
print('trunc_str(src, 13, "...") =', trunc_str(src, 13, "...")) | |
print('trunc_str(src, 14, "...") =', trunc_str(src, 14, "...")) | |
print('trunc_str(src, 15, "...") =', trunc_str(src, 15, "...")) | |
print('trunc_str(src, 16, "...") =', trunc_str(src, 16, "...")) | |
print('trunc_str(src, 12, "(続く)") =', trunc_str(src, 12, "(続く)")) | |
print('trunc_str(src, 13, "(続く)") =', trunc_str(src, 13, "(続く)")) | |
print('trunc_str(src, 14, "(続く)") =', trunc_str(src, 14, "(続く)")) | |
print('trunc_str(src, 15, "(続く)") =', trunc_str(src, 15, "(続く)")) | |
print('trunc_str(src, 16, "(続く)") =', trunc_str(src, 16, "(続く)")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment