Last active
April 9, 2021 07:23
-
-
Save ashdtu/7c59822c225e454136a2165fc03c30d5 to your computer and use it in GitHub Desktop.
Save your files from being automatically deleted from scratch/share directory.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
from tqdm import tqdm | |
import argparse | |
import sys | |
parser = argparse.ArgumentParser(description="Script to read files from directory to prevent automatic deletion") | |
parser.add_argument('--path',default=None,help="Enter directory path of your scratch or share space. Eg: /scratch/dave/ ") | |
parser.add_argument('--days',default=5,type=int,help="If last access day of file is beyond this, read the file to prevent deletion(default:5)") | |
args = parser.parse_args() | |
if len(sys.argv)==1: | |
parser.print_help(sys.stderr) | |
sys.exit(1) | |
file_count = sum(len(files) for _, _, files in os.walk(args.path)) | |
print("INFO: Total files in given path : {}".format(file_count)) | |
print("INFO: Reading files....") | |
"""Current time in system location timezone""" | |
current_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) | |
current_day = current_time.split(' ')[0].split('-')[2] | |
count=0 | |
with tqdm(total=file_count) as pbar: | |
for root, dir, files in os.walk(args.path): | |
if len(files): | |
last_access_times = [os.path.getatime(os.path.join(root, file_name)) for file_name in files] | |
last_access_times = [time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(elem)) for elem in last_access_times] | |
last_access_day = [elem.split(' ')[0].split('-')[2] for elem in last_access_times] | |
for day in last_access_day: | |
assert int(day) <= 31 and int(day) >= 1, "Bruh! the date format seems to be incorrect" | |
days_since_read = [abs(int(current_day) - int(elem)) for elem in last_access_day] | |
for i in range(len(days_since_read)): | |
if days_since_read[i] >= args.days: | |
with open(os.path.join(root,files[i]),"rb") as f: | |
out = f.read() | |
# Even though python does garbage collection for file object automatically,still!!! | |
f.close() | |
count += 1 | |
pbar.update(1) | |
print("INFO: Done!") |
You can run this bash command instead. If the target path is /scratch/dave
and target days are 14
,
find /scratch/dave -type f -atime +14 -exec file {} > /dev/null \;
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Prevent automatic file deletion from /scratch or /share directory space(usually happens every 10 days) while using HPC systems.
Usage :
python save_my_scratch.py --path /scratch/(your username)