Skip to content

Instantly share code, notes, and snippets.

@sc0tt
Last active March 31, 2023 02:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sc0tt/dd0c3bfb9f70d907291e0780fcee3d5f to your computer and use it in GitHub Desktop.
Save sc0tt/dd0c3bfb9f70d907291e0780fcee3d5f to your computer and use it in GitHub Desktop.
I needed to sort around 5k files into different directories based on extension. I used this to try out ChatGPT.
import shutil
import json
from pathlib import Path
# Define the source and destination directories
source_dir = Path('root')
destination_dir = Path('root_sorted')
# Define the mappings between file extensions and categories
category_mappings = {
'audio': ['mp3', 'ogg', 'm4a', 'wav'],
'video': ['mp4', 'webm', 'gifv', 'mov', 'mpg'],
'image': ['svg', 'png', 'gif', 'jpg', 'jpeg', 'webp', 'heic', 'ico'],
'document': ['zip', 'csv', 'epub', 'mobi', '7z', 'gz', 'txt', 'json', 'pdf', 'log', 'xlsx', 'docx'],
'application': ['apk', 'iso', 'exe'],
'database': ['db', 'sqlite']
}
def copy_files(debug=False):
# Initialize variables to keep track of file counts and errors
file_counts = {}
invalid_extensions = {}
copied_files = []
not_copied_files = []
conflicts = []
errors = []
# Create the destination directories
for category in category_mappings:
(destination_dir / category).mkdir(parents=True, exist_ok=True)
(destination_dir / 'unknown').mkdir(parents=True, exist_ok=True)
# Get the total number of files in the source directory for progress tracking
total_files = sum(1 for _ in source_dir.glob('*'))
# Initialize the file counter
file_count = 0
# Iterate through the source directory and copy files to the appropriate destination directory
for file_path in source_dir.glob('*'):
# Get the file extension and category
extension = file_path.suffix.lower()[1:]
category = None
for category_name, extensions in category_mappings.items():
if extension in extensions:
category = category_name
break
# If the file extension is not in the category_mappings, put it in the 'unknown' category
if category is None:
category = 'unknown'
invalid_extensions[extension] = invalid_extensions.get(extension, 0) + 1
else:
# Check if the destination file already exists and has the same size and modified time
destination_path = destination_dir.joinpath(category, file_path.name)
if destination_path.exists():
if file_path.stat().st_size != destination_path.stat().st_size or \
file_path.stat().st_mtime != destination_path.stat().st_mtime:
conflicts.append(file_path.name)
else:
# Copy the file to the destination directory and update the file counts
if not debug:
try:
shutil.copy2(file_path, destination_path)
except Exception as e:
errors.append(file_path.name)
file_counts[category] = file_counts.get(category, 0) + 1
copied_files.append(str(destination_path))
# Update the file counter and print progress information
file_count += 1
print(f'Progress: {file_count}/{total_files} files processed.', end='\r')
# Add files with invalid extensions to the not_copied_files list
not_copied_files = [str(file_path) for file_path in source_dir.glob('*') if file_path.suffix.lower()[1:] not in sum(category_mappings.values(), [])]
# Create a report dictionary
report = {
'file_counts': file_counts,
'total_files_copied': len(copied_files),
'total_files_not_copied': len(not_copied_files),
'total_conflicts': len(conflicts),
'total_invalid_extensions': sum(invalid_extensions.values()),
'invalid_extensions': invalid_extensions,
'conflicts': conflicts,
'errors': errors,
}
return report
# Co-pilot did this:
def create_directory_report_json():
# Include the file list of each category as well as the total number of files in each category
directory_report = {}
for category in category_mappings:
directory_path = destination_dir / category
file_list = [str(file_path) for file_path in directory_path.glob('*')]
directory_report[category] = {
'total_files': len(file_list),
'files': file_list
}
# Save the directory report to a JSON file
directory_report_path = Path('directory_report.json')
with directory_report_path.open(mode='w') as f:
json.dump(directory_report, f, indent=2)
def main():
debug = False
report = copy_files(debug)
# Save the report to a JSON file
report_path = Path('category_report.json')
with report_path.open(mode='w') as f:
json.dump(report, f, indent=2)
# Create a directory report
create_directory_report_json()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment