eksortso/codepoint-equiv.py

## codepoint-equiv.py
#!/usr/bin/env python
"""Illustration of Unicode equivalence in Python 3.8+"""

from unicodedata import normalize

# Both variables represent "Françoise" but use different forms.
print("# Code")
# print(r'n1 = "pr\u00e9nom"')        # prénom
# print(r'n2 = "pr\u0065\u0301nom"')  # prénom
print(r'n1 = "Fran\u00e7oise"')        # Françoise
print(r'n2 = "Fran\u0063\u0327oise"')  # Françoise
print()

n1 = "Fran\u00e7oise"        # Françoise
n2 = "Fran\u0063\u0327oise"  # Françoise

# If your font supports it, these will appear identical.
print("# Result")
print(f'{n1 = }')
print(f'{n2 = }')
print()

# # Here's how to save the strings to a file for your own comparisons:
# with open("codepoint-equiv.txt", "w", encoding="utf8") as file:
#     file.writelines([n1 + "\n", n2 + "\n"])

# Lengths will be different though.
print("# Lengths")
print(f'{len(n1) = }')
print(f'{len(n2) = }')
print()

# Because their code points are different, Python 3.11.0 considers these two
# strings to be different.
print("# Equivalences (or lack thereof)")
print(f'{n1 == n2 = }')  # False

# Here is how n1 and n2 are related.
print(f'{n1 == normalize("NFC", n2) = }')  # True
print(f'{n2 == normalize("NFD", n1) = }')  # True
print()

# When normalized to their _composed_ forms, they're the same.
print("# Comparisons")
print(f'{normalize("NFC", n1) == normalize("NFC", n2) = }')  # True

# And when normalized to their _decomposed_ forms, they're the same.
print(f'{normalize("NFD", n1) == normalize("NFD", n2) = }')  # True
print()
	#!/usr/bin/env python
	"""Illustration of Unicode equivalence in Python 3.8+"""

	from unicodedata import normalize

	# Both variables represent "Françoise" but use different forms.
	print("# Code")
	# print(r'n1 = "pr\u00e9nom"') # prénom
	# print(r'n2 = "pr\u0065\u0301nom"') # prénom
	print(r'n1 = "Fran\u00e7oise"') # Françoise
	print(r'n2 = "Fran\u0063\u0327oise"') # Françoise
	print()

	n1 = "Fran\u00e7oise" # Françoise
	n2 = "Fran\u0063\u0327oise" # Françoise

	# If your font supports it, these will appear identical.
	print("# Result")
	print(f'{n1 = }')
	print(f'{n2 = }')
	print()

	# # Here's how to save the strings to a file for your own comparisons:
	# with open("codepoint-equiv.txt", "w", encoding="utf8") as file:
	# file.writelines([n1 + "\n", n2 + "\n"])

	# Lengths will be different though.
	print("# Lengths")
	print(f'{len(n1) = }')
	print(f'{len(n2) = }')
	print()

	# Because their code points are different, Python 3.11.0 considers these two
	# strings to be different.
	print("# Equivalences (or lack thereof)")
	print(f'{n1 == n2 = }') # False

	# Here is how n1 and n2 are related.
	print(f'{n1 == normalize("NFC", n2) = }') # True
	print(f'{n2 == normalize("NFD", n1) = }') # True
	print()

	# When normalized to their _composed_ forms, they're the same.
	print("# Comparisons")
	print(f'{normalize("NFC", n1) == normalize("NFC", n2) = }') # True

	# And when normalized to their _decomposed_ forms, they're the same.
	print(f'{normalize("NFD", n1) == normalize("NFD", n2) = }') # True
	print()