michael-lazar/zero-byte

## zero-byte
# Unicode code point zero (aka ascii NUL control character)
>>> u"\u0000"

# Is only one code-point long
>>> len(u"\u0000")
1

# Encodes to 0x00 in hex (aka 0000 0000 in binary)
>>> u"\u0000".encode("utf-8")
b'\x00'

# Is one byte long
>>> io.BytesIO().write(b"\x00")
1

# We want to replace this code point zero with the actual unicode sequence for "\" + "u" + "0" + "0" + "0" + "0"
>>> u"\u0000".replace("\u0000", "\\u0000")
'\\u0000'

# This is 6 code-points long
>>> len(u"\\u0000")
6

# And encodes to a six byte sequence
>>> u"\\u0000".encode("utf-8")
b'\\u0000'

# Which looks like this in binary
>>> binascii.hexlify(b'\\u0000')
b'5c7530303030'

(5c = "\", 75 = "u", 30 = "0")

# Verifying the length when written to disk is 6 bytes
>>> io.BytesIO().write(b'\\u0000')
6
	# Unicode code point zero (aka ascii NUL control character)
	>>> u"\u0000"

	# Is only one code-point long
	>>> len(u"\u0000")
	1

	# Encodes to 0x00 in hex (aka 0000 0000 in binary)
	>>> u"\u0000".encode("utf-8")
	b'\x00'

	# Is one byte long
	>>> io.BytesIO().write(b"\x00")
	1

	# We want to replace this code point zero with the actual unicode sequence for "\" + "u" + "0" + "0" + "0" + "0"
	>>> u"\u0000".replace("\u0000", "\\u0000")
	'\\u0000'

	# This is 6 code-points long
	>>> len(u"\\u0000")
	6

	# And encodes to a six byte sequence
	>>> u"\\u0000".encode("utf-8")
	b'\\u0000'

	# Which looks like this in binary
	>>> binascii.hexlify(b'\\u0000')
	b'5c7530303030'

	(5c = "\", 75 = "u", 30 = "0")

	# Verifying the length when written to disk is 6 bytes
	>>> io.BytesIO().write(b'\\u0000')
	6