Skip to content

Instantly share code, notes, and snippets.

@etiennecollin
Last active January 9, 2024 06:31
Show Gist options
  • Save etiennecollin/69ae54b18fde317be0a9fed2a362c9d7 to your computer and use it in GitHub Desktop.
Save etiennecollin/69ae54b18fde317be0a9fed2a362c9d7 to your computer and use it in GitHub Desktop.
The File System Formatter script is a Python program that automates the process of formatting file and directory names within a file system. It applies a set of rules to ensure consistent naming conventions and improve organization. The script allows users to customize the formatting options and provides options for dry run and verbose output.
# !/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author: Etienne Collin
# Date: 2023/12/20
# Email: collin.etienne.contact@gmail.com
################################################################################
# The File System Formatter script is a Python program that automates the process of formatting file and
# directory names within a file system. It applies a set of rules to ensure consistent naming conventions and
# improve organization. The script allows users to customize the formatting options and provides options for
# dry run and verbose output.
################################################################################
# Key Features:
# - To prevent a directory from being formatted, add a file named ".donotformat" to the directory
# - Renames files and directories to meet specified formatting rules
# - Excludes certain files, extensions, and characters from the formatting process
# - Treat files with a certain extensions as directories
# - Deletes specified files from the file system
# - Handles special characters, accents, and spaces in names
# - Supports both dry run mode (previewing changes) and actual formatting
# - Provides verbose output to display the changes made during formatting
################################################################################
# Usage:
# - Run the script with the desired command-line arguments specifying the paths to the directories or files
# to be formatted.
# - Follow the prompts to choose dry run mode, verbose output, and confirmation of the formatting operation.
# - The script will recursively format the contents of the specified directories, renaming files and directories
# according to the specified rules.
# - Optionally, specified files can be deleted from the file system.
# - The script provides a summary of the formatting process, including the number of files and directories
# renamed or deleted.
################################################################################
# Note: It is recommended to review the formatting rules and backup important files before running the script
# to avoid any unintended changes to the file system.
################################################################################
import os
import re
import sys
from unidecode import unidecode
EXCLUDED_FILES = [
"Cargo.lock",
"Cargo.toml",
"LICENSE",
"CNAME",
"README.md",
"target",
"debug",
"src",
"lib",
"out",
"bin",
]
EXCLUDED_EXTENSIONS = [
".class",
".java",
".iml",
".d",
".o",
".rmeta",
".qpf",
".qsf",
".qip",
".qdf",
".qws",
".qps",
".bsf",
".bdf",
".v",
".rpt",
]
TREAT_EXTENSION_AS_DIR = [
".zip",
]
DELETE_FILES = [".DS_Store", "indent.log"]
BAD_CHARS = ',&=+@#^$%*!`"?|<>:'
FILE_SEPARATOR = "_"
DIR_SEPARATOR = "-"
global verbose, dryRun, filesRenamed, dirRenamed, filesDeleted
verbose = False
dryRun = False
filesRenamed = 0
dirRenamed = 0
filesDeleted = 0
def isElementFormatted(element: str, isElementAFile: bool):
nameExtension = os.path.splitext(element)
generalConditions = (
(element.islower() or element.isdigit())
and element.isascii()
and " " not in element
and not containsCharsNotSurroundedByDigits(nameExtension[0], ".")
)
if isElementAFile and nameExtension[1] not in TREAT_EXTENSION_AS_DIR:
return (
generalConditions
and DIR_SEPARATOR not in element
and not substringOfRepeatedCharsExists(element, FILE_SEPARATOR)
and all([False if char in nameExtension[0] else True for char in BAD_CHARS])
)
else:
return (
element == "."
or generalConditions
and FILE_SEPARATOR not in element
and not substringOfRepeatedCharsExists(element, DIR_SEPARATOR)
and all([False if char in element else True for char in BAD_CHARS])
)
def renameElement(elementPath: str, separator: str):
# Get name of element
element = os.path.basename(elementPath)
# Rewrite in lowercase and remove accents
normalizedElement = unidecode(element.lower())
fileName = normalizedElement
extension = ""
# If file, split into name and extension
if os.path.isfile(elementPath):
fileName, extension = os.path.splitext(normalizedElement)
# Remove spaces in extension
extension = re.sub(r"[\s]", "", extension)
# Delete ' in file name
fileName = re.sub(r"[']", "", fileName)
# Replace _ with separator except for the first character
fileName = fileName[0] + re.sub(r"[_]", separator, fileName[1:])
# Replace bad characters with separator
fileName = re.sub(rf"[\s{BAD_CHARS}]", separator, fileName)
# Replace all dots and dashes that are not surrounded by digits with a separator
fileName = re.sub(r"(?<!\d)[-\.]|[-\.](?!\d)", separator, fileName)
# Replace all substrings of separator with a single separator
fileName = re.sub(rf"({separator})\1+", separator, fileName)
# Get full name of renamed element
renamedElement = fileName + extension
# Return elementPath if element is not renamed
if element == renamedElement:
return elementPath
# Rename element
renamedElementPath = os.path.join(os.path.dirname(elementPath), renamedElement)
if not dryRun:
try:
os.rename(elementPath, renamedElementPath)
except OSError:
print(f"| Error renaming {elementPath} --> {renamedElementPath}")
return elementPath
# Print changes
if verbose or dryRun:
print(f"| Renamed {elementPath} --> {renamedElement}")
return renamedElementPath
def containsCharsNotSurroundedByDigits(string: str, chars: str):
# Check if string contains characters not surrounded by digits
return bool(re.search(rf"(?<!\d)[{chars}]|[{chars}](?!\d)", string))
def substringOfRepeatedCharsExists(string: str, chars: str):
# Check if substring of repeated characters exists
return bool(re.search(rf"([{chars}])\1+", string))
def formatDirectory(path: str):
global dirRenamed, filesRenamed, filesDeleted
elements = os.listdir(path)
if ".donotformat" in elements:
return
for element in elements:
# Get path of element
elementPath = os.path.join(path, element)
# Delete files specified in DELETE_FILES
if os.path.isfile(elementPath) and element in DELETE_FILES:
# Delete file
if not dryRun:
os.remove(elementPath)
# Increment deleted files counter
filesDeleted += 1
if verbose or dryRun:
print(f"| Deleted {element}")
continue
# Ignore hidden files and excluded files/extensions
if element[0] == "." or element in EXCLUDED_FILES or os.path.splitext(element)[1] in EXCLUDED_EXTENSIONS:
continue
# Check if element is a directory or a file
if os.path.isdir(elementPath):
# Rename directory if it is not yet formatted...
if not isElementFormatted(element, False):
# Rename directory
renamedElementPath = renameElement(elementPath, DIR_SEPARATOR)
# If directory is renamed, increment renamed directories counter
if elementPath != renamedElementPath:
dirRenamed += 1
# Recursively rename contents of directory
if dryRun:
formatDirectory(elementPath)
else:
formatDirectory(renamedElementPath)
else:
# Recursively rename contents of directory
formatDirectory(elementPath)
elif os.path.isfile(elementPath) and not isElementFormatted(element, True):
# Rename file
renamedElementPath = renameElement(
elementPath,
FILE_SEPARATOR if os.path.splitext(element)[1] not in TREAT_EXTENSION_AS_DIR else DIR_SEPARATOR,
)
# If file is renamed, increment renamed files counter
if elementPath != renamedElementPath:
filesRenamed += 1
def initializeFormatter(path: str):
global dirRenamed, filesRenamed
# Remove trailing slash
if path[-1] == "/":
path = path[:-1]
element = os.path.basename(path)
# Check if element is a directory or a file and if it is formatted
if os.path.isdir(path):
if not isElementFormatted(element, False):
# Rename directory
renamedPath = renameElement(path, DIR_SEPARATOR)
# If directory is renamed, increment renamed directories counter
if path != renamedPath:
dirRenamed += 1
# Format contents of directory
if dryRun:
formatDirectory(path)
else:
formatDirectory(renamedPath)
else:
# Recursively rename contents of directory
formatDirectory(path)
elif os.path.isfile(path) and not isElementFormatted(element, True):
# Rename file
renamedPath = renameElement(
path,
FILE_SEPARATOR if os.path.splitext(element)[1] not in TREAT_EXTENSION_AS_DIR else DIR_SEPARATOR,
)
# If file is renamed, continue
if path != renamedPath:
# Increment renamed files counter
filesRenamed += 1
def launcher():
global verbose, dryRun
print("------------")
while True:
runDry = input("| Do you want to run a dry run? (y/n): ").lower()
if runDry == "y":
dryRun = True
break
elif runDry == "n":
dryRun = False
break
else:
print("| Invalid input")
if not dryRun:
while True:
runVerbose = input("| Do you want to print the changes? (y/n): ").lower()
if runVerbose == "y":
verbose = True
break
elif runVerbose == "n":
verbose = False
break
else:
print("| Invalid input")
while True:
confirmFormat = input("| Are you sure you want to format your file system? (y/n): ").lower()
if confirmFormat == "y":
break
elif confirmFormat == "n":
print("| Your file system has not been formatted")
return
else:
print("| Invalid input")
print("------------")
for arg in sys.argv[1:]:
try:
initializeFormatter(str(arg))
print(f'| Done formatting "{str(arg)}"')
except FileNotFoundError:
print("| File not found")
print(f'| Error encountered while formatting "{str(arg)}"')
except NotADirectoryError:
print("| Directory not found")
print(f'| Error encountered while formatting "{str(arg)}"')
print("------------")
print("| Your file system has properly been formatted")
print(f"| Files renamed: {filesRenamed}")
print(f"| Directories renamed: {dirRenamed}")
print(f"| Files deleted: {filesDeleted}")
print("------------")
if dryRun:
print("| Script ran in dry run mode")
print("------------")
launcher()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment