Skip to content

Instantly share code, notes, and snippets.

@shirou
Created January 16, 2020 01:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shirou/0d3ac8b94841510d3f4e0cf037a83136 to your computer and use it in GitHub Desktop.
Save shirou/0d3ac8b94841510d3f4e0cf037a83136 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import sys
import os
import unicodedata
from tempfile import NamedTemporaryFile
def is_nfd(line):
for char in line.strip():
if unicodedata.combining(char) != 0:
return True
return False
filename = sys.argv[1]
with NamedTemporaryFile(delete=False) as tf:
temp_file_name = tf.name
with open(filename, encoding="utf-8") as f:
for line in f:
text = [char for char in line.strip()]
if is_nfd(line):
converted = unicodedata.normalize("NFC", line)
tf.write(converted.encode())
else:
tf.write(line.encode())
os.rename(temp_file_name, filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment