Skip to content

Instantly share code, notes, and snippets.

@dbr
Created December 14, 2009 18:15
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dbr/256270 to your computer and use it in GitHub Desktop.
Save dbr/256270 to your computer and use it in GitHub Desktop.
Function to ensure a filename is valid
import os
import re
import platform
def makeValidFilename(value, normalize_unicode = False, windows_safe = False, custom_blacklist = None):
"""
Takes a string and makes it into a valid filename.
normalize_unicode replaces accented characters with ASCII equivalent, and
removes characters that cannot be converted sensibly to ASCII.
windows_safe forces Windows-safe filenames, regardless of current platform
custom_blacklist specifies additional characters that will removed. This
will not touch the extension separator:
>>> makeValidFilename("T.est.avi", custom_blacklist=".")
'T_est.avi'
"""
if windows_safe:
# Allow user to make Windows-safe filenames, if they so choose
sysname = "Windows"
else:
sysname = platform.system()
# Treat extension seperatly
value, extension = os.path.splitext(value)
# Remove null byte
value = value.replace("\0", "")
# If the filename starts with a . prepend it with an underscore, so it
# doesn't become hidden
if value.startswith("."):
value = "_" + value
# Blacklist of characters
if sysname == 'Darwin':
# : is technically allowed, but Finder will treat it as / and will
# generally cause weird behaviour, so treat it as invalid.
blacklist = r"/:"
elif sysname == 'Linux':
blacklist = r"/"
else:
# platform.system docs say it could also return "Windows" or "Java".
# Failsafe and use Windows sanitisation for Java, as it could be any
# operating system.
blacklist = r"\/:*?\"<>|"
# Append custom blacklisted characters
if custom_blacklist is not None:
blacklist += custom_blacklist
# Replace every blacklisted character with a underscore
value = re.sub("[%s]" % re.escape(blacklist), "_", value)
# Remove any trailing whitespace
value = value.strip()
# There are a bunch of filenames that are not allowed on Windows.
# As with character blacklist, treat non Darwin/Linux platforms as Windows
if sysname not in ['Darwin', 'Linux']:
invalid_filenames = ["CON", "PRN", "AUX", "NUL", "COM1", "COM2",
"COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1",
"LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"]
if value in invalid_filenames:
value = "_" + value
# Replace accented characters with ASCII equivalent
if normalize_unicode:
import unicodedata
value = unicode(value) # cast data to unicode
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
# Truncate filenames to valid length
if sysname in ['Darwin', 'Linux']:
max_len = 255
else:
max_len = 32
if len(value + extension) > max_len:
if len(extension) > len(value):
# Truncate extension instead of filename, no extension should be
# this long..
new_length = max_len - len(value)
extension = extension[:new_length]
else:
new_length = max_len - len(extension)
value = value[:new_length]
return value + extension
def test():
def assertEquals(a, b):
assert a == b, "Error, %r not equal to %r" % (a, b)
assertEquals(makeValidFilename("test.avi"), "test.avi")
assertEquals(makeValidFilename("Test File.avi"), "Test File.avi")
assertEquals(makeValidFilename("Test"), "Test")
assertEquals(makeValidFilename("Test/File.avi"), "Test_File.avi")
assertEquals(makeValidFilename("Test/File"), "Test_File")
assertEquals(makeValidFilename("Test/File.avi", windows_safe = True), "Test_File.avi")
assertEquals(makeValidFilename("\\/:*?<Evil>|\"", windows_safe = True), "______Evil___")
assertEquals(makeValidFilename("COM2.txt", windows_safe = True), "_COM2.txt")
assertEquals(makeValidFilename("COM2", windows_safe = True), "_COM2")
assertEquals(makeValidFilename("."), "_.")
assertEquals(makeValidFilename(".."), "_..")
assertEquals(makeValidFilename("..."), "_...")
assertEquals(makeValidFilename("Test.avi", custom_blacklist="e"), "T_st.avi")
assertEquals(makeValidFilename("a" * 300), "a" * 255)
assertEquals(makeValidFilename("a" * 255 + ".avi"), "a" * 251 + ".avi")
assertEquals(makeValidFilename("a" * 251 + "b" * 10 + ".avi"), "a" * 251 + ".avi")
assertEquals(makeValidFilename("test." + "a" * 255), "test." + "a" * 250)
if __name__ == '__main__':
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment