Skip to content

Instantly share code, notes, and snippets.

@aoirint
Created March 19, 2023 08:11
Show Gist options
  • Save aoirint/88213228a91ffc913d3a616401e62c9f to your computer and use it in GitHub Desktop.
Save aoirint/88213228a91ffc913d3a616401e62c9f to your computer and use it in GitHub Desktop.
def decode_utf8_bytestream(byte_stream):
i = 0
decoded_strings = []
while i < len(byte_stream):
byte = byte_stream[i]
if byte <= 0x7F:
decoded_strings.append(byte_stream[i:i+1])
i += 1
elif byte <= 0xDF:
decoded_strings.append(byte_stream[i:i+2])
i += 2
elif byte <= 0xEF:
decoded_strings.append(byte_stream[i:i+3])
i += 3
else:
decoded_strings.append(byte_stream[i:i+4])
i += 4
return [bytes(s).decode('utf-8') for s in decoded_strings]
byte_str = b'\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\x00\xe3\x81\x8a\xe3\x81\x86\xe3\x81\x8c\xe3\x81\x84\x00'
decoded_strings = decode_utf8_bytestream(byte_str)
print(decoded_strings[0]) # 'あいう'
print(decoded_strings[1]) # 'おうがい'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment