Skip to content

Instantly share code, notes, and snippets.

@ashdtu
Last active April 9, 2021 07:23
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ashdtu/7c59822c225e454136a2165fc03c30d5 to your computer and use it in GitHub Desktop.
Save ashdtu/7c59822c225e454136a2165fc03c30d5 to your computer and use it in GitHub Desktop.
Save your files from being automatically deleted from scratch/share directory.
import os
import time
from tqdm import tqdm
import argparse
import sys
parser = argparse.ArgumentParser(description="Script to read files from directory to prevent automatic deletion")
parser.add_argument('--path',default=None,help="Enter directory path of your scratch or share space. Eg: /scratch/dave/ ")
parser.add_argument('--days',default=5,type=int,help="If last access day of file is beyond this, read the file to prevent deletion(default:5)")
args = parser.parse_args()
if len(sys.argv)==1:
parser.print_help(sys.stderr)
sys.exit(1)
file_count = sum(len(files) for _, _, files in os.walk(args.path))
print("INFO: Total files in given path : {}".format(file_count))
print("INFO: Reading files....")
"""Current time in system location timezone"""
current_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
current_day = current_time.split(' ')[0].split('-')[2]
count=0
with tqdm(total=file_count) as pbar:
for root, dir, files in os.walk(args.path):
if len(files):
last_access_times = [os.path.getatime(os.path.join(root, file_name)) for file_name in files]
last_access_times = [time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(elem)) for elem in last_access_times]
last_access_day = [elem.split(' ')[0].split('-')[2] for elem in last_access_times]
for day in last_access_day:
assert int(day) <= 31 and int(day) >= 1, "Bruh! the date format seems to be incorrect"
days_since_read = [abs(int(current_day) - int(elem)) for elem in last_access_day]
for i in range(len(days_since_read)):
if days_since_read[i] >= args.days:
with open(os.path.join(root,files[i]),"rb") as f:
out = f.read()
# Even though python does garbage collection for file object automatically,still!!!
f.close()
count += 1
pbar.update(1)
print("INFO: Done!")
@ashdtu
Copy link
Author

ashdtu commented Sep 3, 2019

Prevent automatic file deletion from /scratch or /share directory space(usually happens every 10 days) while using HPC systems.

Usage :
python save_my_scratch.py --path /scratch/(your username)

@jyotishp
Copy link

jyotishp commented Apr 9, 2021

You can run this bash command instead. If the target path is /scratch/dave and target days are 14,

find /scratch/dave -type f -atime +14 -exec file {} > /dev/null \;

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment