Skip to content

Instantly share code, notes, and snippets.

@gullevek
Last active December 3, 2022 06:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gullevek/4d2a5175c23a37d4b0e7fa2db26437a4 to your computer and use it in GitHub Desktop.
Save gullevek/4d2a5175c23a37d4b0e7fa2db26437a4 to your computer and use it in GitHub Desktop.
Python class to shorten double byte string and set correct adapted format length for output print
#!/usr/bin/env python3
"""
formatting with double width characters
"""
import unicodedata
def shorten_string_cjk(intput_string, width, placeholder='..'):
"""
shorten a string with CJK (double byte) characters
Args:
intput_string (string): input string to shorten
width (int): character count to shorten too
placeholder (str, optional): cut of end characters if space is there
Defaults to '..'.
Returns:
string: shortend string
"""
# get the length with double byte charactes
string_len_cjk = string_length_cjk(str(intput_string))
# if double byte width is too big
if string_len_cjk > width:
# set current length and output string
cur_len = 0
out_string = ''
# loop through each character
for char in str(intput_string):
# set the current length if we add the character
cur_len += 2 if unicodedata.east_asian_width(char) in "WF" else 1
# if the new length is smaller than the output length to shorten too add the char
if cur_len <= (width - len(placeholder)):
out_string += char
# return string with new width and placeholder
return f"{out_string}{placeholder}"
else:
return str(intput_string)
def string_length_cjk(input_string):
"""
string lenth for a CJK (double byte) string
Args:
string (string): string to get length for
Returns:
int: length of characters, where CJK (double byte) will count as one
"""
# return string len including double count for double width characters
return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in input_string)
def format_string_length(input_string, length):
"""
returns length udpated for string with double byte characters
get string length normal, get string length including double byte characters
then subtract that from the original length
Args:
input_string (string): string to calculate length of
length (int): maxium length for string
Returns:
int: correct length for string to shorten too
"""
return length - (string_length_cjk(input_string) - len(input_string))
def main():
"""
main call:
test with various strings
"""
string = [
"Some string 123 other text",
"Some string 日本語 other text",
"日本語は string 123 other text",
"あいうえおかきくけこさしすせそなにぬねのまみむめも〜",
"あいうえおかきくけこさしす 1 other text",
"Some string すせそなにぬねのまみむめも〜",
"SOME OTHER STRING THAT IS LONGER THAN TWENTYSIX CHARACTERS"
]
# format_str = "|{{:<{len}}}|"
format_len = 26
string_len = 26
print("Original string")
for _string in string:
print(
"Normal (CJK len "
f"{string_length_cjk(_string):>2}/len {len(_string):>2}): |{_string}|"
)
print("Shorten string")
for _string in string:
print(
f"Calculate> format_len: {format_len}, string_len: {string_len}, "
"stringLenCJK(short) "
f"{string_length_cjk(shorten_string_cjk(_string, width=string_len))}, "
f"len(short) {len(shorten_string_cjk(_string, width=string_len))}, "
"new format_len: "
f"{format_string_length(shorten_string_cjk(_string, width=string_len), format_len)}"
)
# shorten format length by the number of double bye characters found
# in the shortened string
# string shorten uses double byte character count
for _string in string:
string_to_print = shorten_string_cjk(_string, width=string_len)
string_length = format_string_length(
shorten_string_cjk(_string, width=string_len),
format_len
)
print(
"Normal: "
f"|{string_to_print:<{string_length}}|"
)
main()
# __END__
@gullevek
Copy link
Author

gullevek commented Dec 3, 2022

Updated with Python 3.9/10
pylance linting pass

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment