Skip to content

Instantly share code, notes, and snippets.

@IMBlues
Last active June 1, 2021 16:51
Show Gist options
  • Save IMBlues/2803c02420d01309d33f9d26ee501223 to your computer and use it in GitHub Desktop.
Save IMBlues/2803c02420d01309d33f9d26ee501223 to your computer and use it in GitHub Desktop.
Delete files without any fd open(non-asyncio)
# -*- coding: utf-8 -*-
import datetime
import getopt
import glob
import logging
import subprocess
import sys
from collections import namedtuple
from pathlib import Path
from typing import Generator, Dict
logging.basicConfig(level=logging.DEBUG)
Process = namedtuple("Process", "command,pid,user")
def get_processes_open(file_regex: str) -> Dict[str, Process]:
"""Find processes with open handles for the specified file(s)."""
open_file_process_map = {}
try:
# maybe not safe
output = subprocess.getoutput(f"echo {file_regex} | xargs lsof")
except Exception:
logging.exception("exec error")
return open_file_process_map
lines = output.split("\n")[1:]
for line in lines:
parts = [x for x in line.split(" ") if x]
open_file_process_map[parts[-1]] = Process(*parts[:3])
return open_file_process_map
def try_to_delete_files(file_regex: str, minutes: int):
"""Try to delete files"""
skipped_count = deleted_count = 0
def filter_files_by_expire_minutes() -> Generator[Path, None, None]:
"""Get all expired files"""
now = datetime.datetime.now()
for name in glob.glob(file_regex, recursive=True):
file_obj = Path(name)
updated = datetime.datetime.fromtimestamp(file_obj.stat().st_mtime)
if updated < now - datetime.timedelta(minutes=minutes):
yield file_obj
open_file_process_map = get_processes_open(file_regex)
for f in filter_files_by_expire_minutes():
if str(f) not in open_file_process_map:
deleted_count += 1
logging.info(f"deleting {f}")
else:
skipped_count += 1
logging.info(f"skipping {f}, for process: {open_file_process_map[str(f)]}")
print(f"Skipped: {skipped_count}, deleted: {deleted_count}")
if __name__ == "__main__":
try:
opts, args = getopt.getopt(sys.argv[1:], "hf:m:", ["files=", "minutes="])
except getopt.GetoptError:
print("Example: python delete_files.py -f /data/*/*.log* -m 1440")
sys.exit(2)
arg_files = ""
arg_minutes = 1440
for opt, arg in opts:
if opt == "-h":
print("Usage: python delete_files.py -f /data/*/*.log* -m 1440")
sys.exit(2)
elif opt in ("-f", "--files"):
arg_files = arg
elif opt in ("-m", "--minutes"):
arg_minutes = int(arg)
started = datetime.datetime.now()
print(f"Started: {started}\n>>>>>>>>>>>>")
try_to_delete_files(arg_files, arg_minutes)
finished = datetime.datetime.now()
print(
f">>>>>>>>>>>>\nFinished: {finished}, total cost: {(finished - started).total_seconds()} seconds \n"
)
@IMBlues
Copy link
Author

IMBlues commented Jun 1, 2021

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment