Skip to content

Instantly share code, notes, and snippets.

@IMBlues
Last active June 1, 2021 08:38
Show Gist options
  • Save IMBlues/c283bb900e53b2db329b94518e5ad31d to your computer and use it in GitHub Desktop.
Save IMBlues/c283bb900e53b2db329b94518e5ad31d to your computer and use it in GitHub Desktop.
Delete files without any fd open
# -*- coding: utf-8 -*-
import asyncio
import datetime
import getopt
import glob
import logging
import os
import sys
from collections import namedtuple
from pathlib import Path
from typing import Generator, Tuple
logging.basicConfig(level=logging.DEBUG)
Process = namedtuple("Process", "command,pid,user")
skipped_count = deleted_count = 0
def filter_files_by_expire_minutes(
file_regex: str, minutes: int
) -> Generator[Tuple[Path, datetime.datetime], None, None]:
"""Get all expired files"""
now = datetime.datetime.now()
for name in glob.glob(file_regex, recursive=True):
if not name:
continue
file_obj = Path(name)
updated = datetime.datetime.fromtimestamp(file_obj.stat().st_mtime)
if updated < now - datetime.timedelta(minutes=minutes):
yield file_obj, updated
async def try_to_delete_file(p: Path):
global skipped_count, deleted_count
async with semaphore:
proc = await asyncio.create_subprocess_exec("lsof", str(p), stdout=asyncio.subprocess.PIPE, loop=loop)
# Read one line of output
data = await proc.stdout.read()
lines = data.decode('ascii').rstrip().split("\n")
lines = [x for x in lines if x]
if not lines:
logging.info(f"deleting {p}")
deleted_count += 1
p.unlink()
return
processes = [Process(*[y for y in x.split(" ") if y][:3]) for x in lines[1:]]
if len(processes) == 1 and os.getpid() == int(processes[0].pid):
deleted_count += 1
logging.info(f"deleting {p}")
p.unlink()
return
skipped_count += 1
logging.info(f"skipping {p}, for other processes: {','.join([f'{x.command}:{x.pid}' for x in processes])}")
async def try_to_delete_files(files: Generator[Tuple[Path, datetime.datetime], None, None]):
"""Try to delete files"""
tasks = []
for index, f in enumerate(files):
path, updated = f
logging.debug("adding %s, %s", path, updated)
tasks.append(try_to_delete_file(path))
logging.debug("%s tasks added \n>>>>>>>>>>>>", len(tasks))
await asyncio.gather(*tasks)
if __name__ == '__main__':
try:
opts, args = getopt.getopt(sys.argv[1:], "hf:m:", ["files=", "minutes="])
except getopt.GetoptError:
print("delete_files.py -f /data/*/*.log* -m 1440")
sys.exit(2)
arg_files = ""
arg_minutes = 60 * 24
for opt, arg in opts:
if opt == '-h':
print("Usage: python delete_files.py -f /data/*/*.log* -m 1440")
sys.exit(2)
elif opt in ("-f", "--files"):
arg_files = arg
elif opt in ("-m", "--minutes"):
arg_minutes = int(arg)
started = datetime.datetime.now()
print(f"Started: {started}\n>>>>>>>>>>>>")
# python3.6 is default version on most machines
loop = asyncio.get_event_loop()
# too many io request may cause: BlockingIOError: [Errno 11] Resource temporarily unavailable
semaphore = asyncio.Semaphore(20)
try:
loop.run_until_complete(try_to_delete_files(filter_files_by_expire_minutes(arg_files, arg_minutes)))
finally:
loop.close()
finished = datetime.datetime.now()
print(f">>>>>>>>>>>>\nFinished: {finished}, total cost: {(finished - started).total_seconds()}seconds \n")
print(f"Skipped: {skipped_count}, deleted: {deleted_count}")
@IMBlues
Copy link
Author

IMBlues commented Jun 1, 2021

Example

python delete_files.py -f */*/*.log -m 720

All hit files will be deleted unless they are opened by other processes, especially useful when cleaning up expired logs.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment