Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Simple script to rename non-BMP Unicode characters out of file and folder names, recursively
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Strip emoji and other non-BMP codepoints from paths to make them compatible
with mkisofs/genisoimage"""
# Prevent Python 2.x PyLint from complaining if run on this
from __future__ import (absolute_import, division, print_function,
with_statement, unicode_literals)
__author__ = "Stephan Sokolow (deitarion/SSokolow)"
__appname__ = "strip_emoji.py"
__version__ = "0.1"
__license__ = "MIT"
import logging, os, re
log = logging.getLogger(__name__)
NON_BMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
def process_path(path):
replaced = NON_BMP_RE.sub('', path)
if replaced != path:
os.rename(path, replaced)
return replaced
def process_arg(path):
for path, dirs, files in os.walk(path):
dirs.sort()
dnew = [process_path(os.path.join(path, x)) for x in dirs]
dirs[:] = [os.path.basename(x) for x in dnew]
for fname in files:
process_path(os.path.join(path, fname))
def main():
"""The main entry point, compatible with setuptools entry points."""
from argparse import ArgumentParser, RawDescriptionHelpFormatter
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description=__doc__.replace('\r\n', '\n').split('\n--snip--\n')[0])
parser.add_argument('--version', action='version',
version="%%(prog)s v%s" % __version__)
parser.add_argument('-v', '--verbose', action="count",
default=2, help="Increase the verbosity. Use twice for extra effect.")
parser.add_argument('-q', '--quiet', action="count",
default=0, help="Decrease the verbosity. Use twice for extra effect.")
parser.add_argument('path', action="store", nargs="+",
help="Path to operate on")
# Reminder: %(default)s can be used in help strings.
args = parser.parse_args()
# Set up clean logging to stderr
log_levels = [logging.CRITICAL, logging.ERROR, logging.WARNING,
logging.INFO, logging.DEBUG]
args.verbose = min(args.verbose - args.quiet, len(log_levels) - 1)
args.verbose = max(args.verbose, 0)
logging.basicConfig(level=log_levels[args.verbose],
format='%(levelname)s: %(message)s')
for path in args.path:
process_arg(path)
if __name__ == '__main__': # pragma: nocover
main()
# vim: set sw=4 sts=4 expandtab :
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment