gullevek/double_byte_string_format.py

## double_byte_string_format.py
#!/usr/bin/env python3

"""
formatting with double width characters

"""

import unicodedata

def shorten_string_cjk(intput_string, width, placeholder='..'):
    """
    shorten a string with CJK (double byte) characters

    Args:
        intput_string (string): input string to shorten
        width (int): character count to shorten too
        placeholder (str, optional): cut of end characters if space is there
            Defaults to '..'.

    Returns:
        string: shortend string
    """
    # get the length with double byte charactes
    string_len_cjk = string_length_cjk(str(intput_string))
    # if double byte width is too big
    if string_len_cjk > width:
        # set current length and output string
        cur_len = 0
        out_string = ''
        # loop through each character
        for char in str(intput_string):
            # set the current length if we add the character
            cur_len += 2 if unicodedata.east_asian_width(char) in "WF" else 1
            # if the new length is smaller than the output length to shorten too add the char
            if cur_len <= (width - len(placeholder)):
                out_string += char
        # return string with new width and placeholder
        return f"{out_string}{placeholder}"
    else:
        return str(intput_string)

def string_length_cjk(input_string):
    """
    string lenth for a CJK (double byte) string

    Args:
        string (string): string to get length for

    Returns:
        int: length of characters, where CJK (double byte) will count as one
    """
    # return string len including double count for double width characters
    return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in input_string)

def format_string_length(input_string, length):
    """
    returns length udpated for string with double byte characters
    get string length normal, get string length including double byte characters
    then subtract that from the original length

    Args:
        input_string (string): string to calculate length of
        length (int): maxium length for string

    Returns:
        int: correct length for string to shorten too
    """
    return length - (string_length_cjk(input_string) - len(input_string))

def main():
    """
    main call:
    test with various strings
    """
    string = [
        "Some string 123 other text",
        "Some string 日本語 other text",
        "日本語は string 123 other text",
        "あいうえおかきくけこさしすせそなにぬねのまみむめも〜",
        "あいうえおかきくけこさしす 1 other text",
        "Some string すせそなにぬねのまみむめも〜",
        "SOME OTHER STRING THAT IS LONGER THAN TWENTYSIX CHARACTERS"
    ]

    # format_str = "|{{:<{len}}}|"
    format_len = 26
    string_len = 26
    print("Original string")
    for _string in string:
        print(
            "Normal  (CJK len "
            f"{string_length_cjk(_string):>2}/len {len(_string):>2}): |{_string}|"
        )
    print("Shorten string")
    for _string in string:
        print(
            f"Calculate> format_len: {format_len}, string_len: {string_len}, "
            "stringLenCJK(short) "
            f"{string_length_cjk(shorten_string_cjk(_string, width=string_len))}, "
            f"len(short) {len(shorten_string_cjk(_string, width=string_len))}, "
            "new format_len: "
            f"{format_string_length(shorten_string_cjk(_string, width=string_len), format_len)}"
        )
    # shorten format length by the number of double bye characters found
    # in the shortened string
    # string shorten uses double byte character count
    for _string in string:
        string_to_print = shorten_string_cjk(_string, width=string_len)
        string_length = format_string_length(
            shorten_string_cjk(_string, width=string_len),
            format_len
        )
        print(
            "Normal: "
            f"|{string_to_print:<{string_length}}|"
        )

main()

# __END__
	#!/usr/bin/env python3

	"""
	formatting with double width characters

	"""

	import unicodedata

	def shorten_string_cjk(intput_string, width, placeholder='..'):
	"""
	shorten a string with CJK (double byte) characters

	Args:
	intput_string (string): input string to shorten
	width (int): character count to shorten too
	placeholder (str, optional): cut of end characters if space is there
	Defaults to '..'.

	Returns:
	string: shortend string
	"""
	# get the length with double byte charactes
	string_len_cjk = string_length_cjk(str(intput_string))
	# if double byte width is too big
	if string_len_cjk > width:
	# set current length and output string
	cur_len = 0
	out_string = ''
	# loop through each character
	for char in str(intput_string):
	# set the current length if we add the character
	cur_len += 2 if unicodedata.east_asian_width(char) in "WF" else 1
	# if the new length is smaller than the output length to shorten too add the char
	if cur_len <= (width - len(placeholder)):
	out_string += char
	# return string with new width and placeholder
	return f"{out_string}{placeholder}"
	else:
	return str(intput_string)

	def string_length_cjk(input_string):
	"""
	string lenth for a CJK (double byte) string

	Args:
	string (string): string to get length for

	Returns:
	int: length of characters, where CJK (double byte) will count as one
	"""
	# return string len including double count for double width characters
	return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in input_string)

	def format_string_length(input_string, length):
	"""
	returns length udpated for string with double byte characters
	get string length normal, get string length including double byte characters
	then subtract that from the original length

	Args:
	input_string (string): string to calculate length of
	length (int): maxium length for string

	Returns:
	int: correct length for string to shorten too
	"""
	return length - (string_length_cjk(input_string) - len(input_string))

	def main():
	"""
	main call:
	test with various strings
	"""
	string = [
	"Some string 123 other text",
	"Some string 日本語 other text",
	"日本語は string 123 other text",
	"あいうえおかきくけこさしすせそなにぬねのまみむめも〜",
	"あいうえおかきくけこさしす 1 other text",
	"Some string すせそなにぬねのまみむめも〜",
	"SOME OTHER STRING THAT IS LONGER THAN TWENTYSIX CHARACTERS"
	]

	# format_str = "\|{{:<{len}}}\|"
	format_len = 26
	string_len = 26
	print("Original string")
	for _string in string:
	print(
	"Normal (CJK len "
	f"{string_length_cjk(_string):>2}/len {len(_string):>2}): \|{_string}\|"
	)
	print("Shorten string")
	for _string in string:
	print(
	f"Calculate> format_len: {format_len}, string_len: {string_len}, "
	"stringLenCJK(short) "
	f"{string_length_cjk(shorten_string_cjk(_string, width=string_len))}, "
	f"len(short) {len(shorten_string_cjk(_string, width=string_len))}, "
	"new format_len: "
	f"{format_string_length(shorten_string_cjk(_string, width=string_len), format_len)}"
	)
	# shorten format length by the number of double bye characters found
	# in the shortened string
	# string shorten uses double byte character count
	for _string in string:
	string_to_print = shorten_string_cjk(_string, width=string_len)
	string_length = format_string_length(
	shorten_string_cjk(_string, width=string_len),
	format_len
	)
	print(
	"Normal: "
	f"\|{string_to_print:<{string_length}}\|"
	)

	main()

	# __END__