Skip to content

Instantly share code, notes, and snippets.

@xthezealot
Last active August 3, 2023 14:07
Show Gist options
  • Save xthezealot/9a65fac2c7b916c4d84e66188bf06bec to your computer and use it in GitHub Desktop.
Save xthezealot/9a65fac2c7b916c4d84e66188bf06bec to your computer and use it in GitHub Desktop.
Normalize unicode file names (converts UTF-8 NFD to NFC). Required by macOS clients through AFP/NFS/SMB. Tested on Synology DSM 6.2 with built-in Python 2.7.12.

NFCFN.py

Normalize unicode file names (converts UTF-8 NFD to NFC).
Required by macOS clients through AFP/NFS/SMB.

Tested on Synology DSM 6.2 with built-in Python 2.7.12.

Usage

# 1. Activate SSH on your NAS

# 2. On your computer, open a new console/terminal and connect to your server:
ssh YourUserName@192.168.0.xxx

# 3. Go to the directory where you want saving the `nfcfn.py` script:
cd /volume1/YourSharedFolder/PathToScript

# 4. Download the latest version:
wget https://gist.githubusercontent.com/xthezealot/9a65fac2c7b916c4d84e66188bf06bec/raw/nfcfn.py

# 5. Run it with Python to check the result:
python nfcfn.py -cr /volume1/YourSharedFolder

# 6. When you are sure, add the `-p` flag to effectively rename the files:
python nfcfn.py -crp /volume1/YourSharedFolder
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Normalize unicode file names."""
from __future__ import unicode_literals
from argparse import ArgumentParser
from os import rename, walk
from os.path import exists, isfile, join, split
from sys import version_info
from unicodedata import normalize
def bytes_saved(old, new):
"""Print difference of bytes between old an new string."""
diff = len(new) - len(old)
s = "[\033["
if diff < 0:
s += "32m" + str(diff)
elif diff > 0:
s += "31m+" + str(diff)
else:
s += "34m="
s += " byte"
if abs(diff) > 1:
s += "s"
return s + "\033[0m]"
def norm(root, file, form, proceed):
"""Do the normalization."""
normed = (
normalize(form, file).replace("/", "/").replace("\\", "\").replace(":", ":")
)
if file != normed:
old = join(root, file)
new = join(root, normed)
if exists(new):
print("%s \033[31mcannot be renamed as\033[0m %s \033[31malready exists\033[0m" % (old, normed))
else:
print("%s ▶︎ %s %s" % (old, normed, bytes_saved(file, normed)))
if proceed:
rename(old, new)
def main():
"""Normalize unicode file names."""
parser = ArgumentParser(description="Normalize unicode file names.")
parser.add_argument("source", help="the source file or directory")
parser.add_argument(
"-c",
"--compatibility",
action="store_true",
help='normalize with compatibility (ex: "fi"' ' becomes "fi")',
)
parser.add_argument("-p", "--proceed", action="store_true", help="rename files")
parser.add_argument(
"-r",
"--recursive",
action="store_true",
help="go through directories recursively",
)
args = parser.parse_args()
if version_info < (3,):
args.source = unicode(args.source, "utf8")
norm_form = "NFKC" if args.compatibility else "NFC"
# Source is a file
if isfile(args.source):
head, tail = split(args.source)
norm(head, tail, norm_form, args.proceed)
# Source is a directory
else:
for root, dirs, files in walk(args.source):
for d in dirs:
norm(root, d, norm_form, args.proceed)
for f in files:
norm(root, f, norm_form, args.proceed)
if not args.recursive:
break
if __name__ == "__main__":
main()
@janusn
Copy link

janusn commented Jul 16, 2021

Thank you very much for your explanation. I understand now. 👍🏼

I think I better omit the -c option in my use case though. It causes confusion to a few tools I use.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment