Created
August 31, 2023 10:55
-
-
Save shihono/042d05f95ca4ac11f9eb12a7492e01f4 to your computer and use it in GitHub Desktop.
python difflib.ndiff を全角に対応
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import difflib | |
import unicodedata | |
def get_char_width_list(text): | |
"""text の文字ごとの幅をリストで返す | |
半角の場合は1, 全角の場合は2 | |
""" | |
result = [] | |
for c in text: | |
if unicodedata.east_asian_width(c) in ["F", "W"]: | |
result.append(2) | |
else: | |
result.append(1) | |
return result | |
def ndiff_fulwidth(a, b): | |
"""ndiff で '?' からはじまる行を全角に対応""" | |
before_line = "" | |
for line in difflib.ndiff(a,b): | |
if line[0] == "?": | |
before_width = get_char_width_list(before_line[2:]) | |
conv_line = [line[:2]] | |
for b_width, line_char in zip(before_width, line[2:]): | |
if line_char == "\n": | |
conv_line.append(line_char) | |
elif line_char == " " and b_width == 2: | |
# 半角スペースを全角スペースに | |
conv_line.append("\u3000") | |
else: | |
# 全角の場合は同じ文字を2回繰り返す | |
conv_line.append(line_char*b_width) | |
yield "".join(conv_line) | |
else: | |
yield line | |
before_line = line | |
""" | |
:example: | |
print( | |
"\n".join(ndiff_fulwidth( | |
["アップル", "バナナ", "チョコト", "ドーナツ", "English", "フォトグラフィ"], | |
["アップル", "ナナ", "チョコレート", "ドーナッツ", "EngIish", "フォトダラフィ"] | |
)) | |
) | |
>>> | |
アップル | |
- バナナ | |
? -- | |
+ ナナ | |
- チョコト | |
+ チョコレート | |
? ++++ | |
- ドーナツ | |
+ ドーナッツ | |
? ++ | |
- English | |
? ^ | |
+ EngIish | |
? ^ | |
- フォトグラフィ | |
? ^^ | |
+ フォトダラフィ | |
? ^^ | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment