Skip to content

Instantly share code, notes, and snippets.

@w-e-w
Last active November 3, 2023 19:03
Show Gist options
  • Save w-e-w/f6c606faac8c2cebee49bdeb7226826c to your computer and use it in GitHub Desktop.
Save w-e-w/f6c606faac8c2cebee49bdeb7226826c to your computer and use it in GitHub Desktop.
Python replace illegal filename characters
import re
character_translation_table = str.maketrans('"*/:<>?\\|\t\n\v\f\r', '"*/:<>?\│ ')
leading_space_pattern = re.compile(r'^\s+')
def replace_illegal_filename_characters(input_filename: str):
r"""
Replace illegal characters with full-width variant
Table
" -> uff02 full-width quotation mark "
* -> uff0a full-width asterisk *
/ -> uff0f full-width solidus /
: -> uff1a full-width colon :
< -> uff1c full-width less-than sign <
> -> uff1e full-width greater-than sign >
? -> uff1f full-width question mark ?
\ -> uff3c full-width reverse solidus \
| -> uffe8 half-width forms light vertical │
\t\n\v\f\r -> u0020 space
"""
return input_filename.translate(character_translation_table)
def replace_illegal_filename_characters_leading_underscores(input_filename: str):
r"""
Replace illegal characters with full-width variant
Replace leading spaces with underscores
Table
" -> uff02 full-width quotation mark "
* -> uff0a full-width asterisk *
/ -> uff0f full-width solidus /
: -> uff1a full-width colon :
< -> uff1c full-width less-than sign <
> -> uff1e full-width greater-than sign >
? -> uff1f full-width question mark ?
\ -> uff3c full-width reverse solidus \
| -> uffe8 half-width forms light vertical │
\t\n\v\f\r -> u0020 space
"""
output_filename = input_filename.translate(character_translation_table)
output_filename = re.sub(leading_space_pattern, lambda match: '_' * len(match.group(0)), output_filename)
return output_filename
def replace_illegal_filename_characters_prefix_underscore(input_filename: str):
r"""
Replace illegal characters with full-width variant
if leading space then add underscore prefix
Table
" -> uff02 full-width quotation mark "
* -> uff0a full-width asterisk *
/ -> uff0f full-width solidus /
: -> uff1a full-width colon :
< -> uff1c full-width less-than sign <
> -> uff1e full-width greater-than sign >
? -> uff1f full-width question mark ?
\ -> uff3c full-width reverse solidus \
| -> uffe8 half-width forms light vertical │
\t\n\v\f\r -> u0020 space
"""
output_filename = input_filename.translate(character_translation_table)
return '_' + output_filename if output_filename.startswith(' ') else output_filename
if __name__ == '__main__':
filename = '\t\n\v\f\ra*b/c:d<e>f?g\\h|i\t\n\v\f\r.txt'
print(f'''Original:
"{filename}"
Replaced:
"{replace_illegal_filename_characters(filename)}"
"{replace_illegal_filename_characters_leading_underscores(filename)}"
"{replace_illegal_filename_characters_prefix_underscore(filename)}"
''')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment