Last active
June 1, 2021 08:38
-
-
Save IMBlues/c283bb900e53b2db329b94518e5ad31d to your computer and use it in GitHub Desktop.
Delete files without any fd open
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import asyncio | |
import datetime | |
import getopt | |
import glob | |
import logging | |
import os | |
import sys | |
from collections import namedtuple | |
from pathlib import Path | |
from typing import Generator, Tuple | |
logging.basicConfig(level=logging.DEBUG) | |
Process = namedtuple("Process", "command,pid,user") | |
skipped_count = deleted_count = 0 | |
def filter_files_by_expire_minutes( | |
file_regex: str, minutes: int | |
) -> Generator[Tuple[Path, datetime.datetime], None, None]: | |
"""Get all expired files""" | |
now = datetime.datetime.now() | |
for name in glob.glob(file_regex, recursive=True): | |
if not name: | |
continue | |
file_obj = Path(name) | |
updated = datetime.datetime.fromtimestamp(file_obj.stat().st_mtime) | |
if updated < now - datetime.timedelta(minutes=minutes): | |
yield file_obj, updated | |
async def try_to_delete_file(p: Path): | |
global skipped_count, deleted_count | |
async with semaphore: | |
proc = await asyncio.create_subprocess_exec("lsof", str(p), stdout=asyncio.subprocess.PIPE, loop=loop) | |
# Read one line of output | |
data = await proc.stdout.read() | |
lines = data.decode('ascii').rstrip().split("\n") | |
lines = [x for x in lines if x] | |
if not lines: | |
logging.info(f"deleting {p}") | |
deleted_count += 1 | |
p.unlink() | |
return | |
processes = [Process(*[y for y in x.split(" ") if y][:3]) for x in lines[1:]] | |
if len(processes) == 1 and os.getpid() == int(processes[0].pid): | |
deleted_count += 1 | |
logging.info(f"deleting {p}") | |
p.unlink() | |
return | |
skipped_count += 1 | |
logging.info(f"skipping {p}, for other processes: {','.join([f'{x.command}:{x.pid}' for x in processes])}") | |
async def try_to_delete_files(files: Generator[Tuple[Path, datetime.datetime], None, None]): | |
"""Try to delete files""" | |
tasks = [] | |
for index, f in enumerate(files): | |
path, updated = f | |
logging.debug("adding %s, %s", path, updated) | |
tasks.append(try_to_delete_file(path)) | |
logging.debug("%s tasks added \n>>>>>>>>>>>>", len(tasks)) | |
await asyncio.gather(*tasks) | |
if __name__ == '__main__': | |
try: | |
opts, args = getopt.getopt(sys.argv[1:], "hf:m:", ["files=", "minutes="]) | |
except getopt.GetoptError: | |
print("delete_files.py -f /data/*/*.log* -m 1440") | |
sys.exit(2) | |
arg_files = "" | |
arg_minutes = 60 * 24 | |
for opt, arg in opts: | |
if opt == '-h': | |
print("Usage: python delete_files.py -f /data/*/*.log* -m 1440") | |
sys.exit(2) | |
elif opt in ("-f", "--files"): | |
arg_files = arg | |
elif opt in ("-m", "--minutes"): | |
arg_minutes = int(arg) | |
started = datetime.datetime.now() | |
print(f"Started: {started}\n>>>>>>>>>>>>") | |
# python3.6 is default version on most machines | |
loop = asyncio.get_event_loop() | |
# too many io request may cause: BlockingIOError: [Errno 11] Resource temporarily unavailable | |
semaphore = asyncio.Semaphore(20) | |
try: | |
loop.run_until_complete(try_to_delete_files(filter_files_by_expire_minutes(arg_files, arg_minutes))) | |
finally: | |
loop.close() | |
finished = datetime.datetime.now() | |
print(f">>>>>>>>>>>>\nFinished: {finished}, total cost: {(finished - started).total_seconds()}seconds \n") | |
print(f"Skipped: {skipped_count}, deleted: {deleted_count}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example
All hit files will be deleted unless they are opened by other processes, especially useful when cleaning up expired logs.