Skip to content

Instantly share code, notes, and snippets.

@aaw3
Created January 10, 2024 01:25
Show Gist options
  • Save aaw3/10a361123e03fd5a8b605801306f9929 to your computer and use it in GitHub Desktop.
Save aaw3/10a361123e03fd5a8b605801306f9929 to your computer and use it in GitHub Desktop.
Processes Windows File History Backup by getting latest file and deleting the older ones
#!/usr/bin/python3
# Solution inspired by: https://gist.github.com/kmorcinek/2710267
# This script will convert the date between the parentehsis and get the oldest date
# It will delete the older files and search all sub directories
# This definitely isn't the most efficient, but can process tens of thousands of files in less than a second
import re
from os.path import join
import os.path
from os import walk, rename, remove
import os
import sys
from datetime import datetime
FOLDER_PATH = os.getcwd()
processed = []
for path, subdirs, files in walk(FOLDER_PATH):
for i, name in enumerate(files): # Iterate over the original list
if name in processed:
continue
print('{} / {}'.format(i + 1, len(files)))
# Extract the date from the parentheses using regex
match = re.search(r'\((\d{4}_\d{2}_\d{2} \d{2}_\d{2}_\d{2} UTC)\)', name)
if match:
date_in_parentheses = match.group(1)
# Convert the date to a datetime object for comparison
formatted_date = datetime.strptime(date_in_parentheses, "%Y_%m_%d %H_%M_%S %Z")
# Find all files that match regex
duplicates = [f for f in files if re.sub(r' \(.+\)', '', f) == re.sub(r' \(.+\)', '', name)]
# Find the newest file among duplicates
newest_file = max(duplicates, key=lambda f: datetime.strptime(re.search(r'\((\d{4}_\d{2}_\d{2} \d{2}_\d{2}_\d{2} UTC)\)', f).group(1), "%Y_%m_%d %H_%M_%S %Z"))
print("Newest file found:", newest_file)
# Rename the current file if needed
new_name = re.sub(r' \(.+\)', '', newest_file)
# Move older duplicates to the fixed directory
for duplicate in duplicates:
duplicate_path = join(path, duplicate)
if duplicate != newest_file:
# Check if file exists because it can be deleted already
if os.path.isfile(duplicate_path):
remove(duplicate_path)
print(f"Removed older duplicate: {duplicate_path}")
else:
if os.path.isfile(duplicate_path):
rename(join(path, newest_file), join(path, new_name))
print(f"Renamed: {duplicate_path} to {join(path, new_name)}")
processed.append(duplicate_path)
else:
print(f"Skipping {join(path, name)}")
print(f"Processed {len(processed)} files.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment