Created
January 10, 2024 01:25
-
-
Save aaw3/10a361123e03fd5a8b605801306f9929 to your computer and use it in GitHub Desktop.
Processes Windows File History Backup by getting latest file and deleting the older ones
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# Solution inspired by: https://gist.github.com/kmorcinek/2710267 | |
# This script will convert the date between the parentehsis and get the oldest date | |
# It will delete the older files and search all sub directories | |
# This definitely isn't the most efficient, but can process tens of thousands of files in less than a second | |
import re | |
from os.path import join | |
import os.path | |
from os import walk, rename, remove | |
import os | |
import sys | |
from datetime import datetime | |
FOLDER_PATH = os.getcwd() | |
processed = [] | |
for path, subdirs, files in walk(FOLDER_PATH): | |
for i, name in enumerate(files): # Iterate over the original list | |
if name in processed: | |
continue | |
print('{} / {}'.format(i + 1, len(files))) | |
# Extract the date from the parentheses using regex | |
match = re.search(r'\((\d{4}_\d{2}_\d{2} \d{2}_\d{2}_\d{2} UTC)\)', name) | |
if match: | |
date_in_parentheses = match.group(1) | |
# Convert the date to a datetime object for comparison | |
formatted_date = datetime.strptime(date_in_parentheses, "%Y_%m_%d %H_%M_%S %Z") | |
# Find all files that match regex | |
duplicates = [f for f in files if re.sub(r' \(.+\)', '', f) == re.sub(r' \(.+\)', '', name)] | |
# Find the newest file among duplicates | |
newest_file = max(duplicates, key=lambda f: datetime.strptime(re.search(r'\((\d{4}_\d{2}_\d{2} \d{2}_\d{2}_\d{2} UTC)\)', f).group(1), "%Y_%m_%d %H_%M_%S %Z")) | |
print("Newest file found:", newest_file) | |
# Rename the current file if needed | |
new_name = re.sub(r' \(.+\)', '', newest_file) | |
# Move older duplicates to the fixed directory | |
for duplicate in duplicates: | |
duplicate_path = join(path, duplicate) | |
if duplicate != newest_file: | |
# Check if file exists because it can be deleted already | |
if os.path.isfile(duplicate_path): | |
remove(duplicate_path) | |
print(f"Removed older duplicate: {duplicate_path}") | |
else: | |
if os.path.isfile(duplicate_path): | |
rename(join(path, newest_file), join(path, new_name)) | |
print(f"Renamed: {duplicate_path} to {join(path, new_name)}") | |
processed.append(duplicate_path) | |
else: | |
print(f"Skipping {join(path, name)}") | |
print(f"Processed {len(processed)} files.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment