Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Skip processed entities
"""
Off the shelf way to skip already processed entities
This scripts processes files, remembers which files have been already
processed and skips them on the next run.
"""
import glob
import os
import shelve
def create_files(file_list):
"""Creates specified files on disk with boring contents."""
for obj in file_list:
with open(obj, mode="wt") as to_disk:
to_disk.write(f"In {os.path.basename(obj)}")
return None
def process_file(x):
"""Reads file and prints its contents."""
with open(file=x, mode="rt") as in_file:
content = in_file.readlines()
print(content)
def list_files(path):
"""List all files in a specified path."""
files = glob.glob(os.path.join(path, "file*"))
return sorted(files)
# noinspection PyBroadException
def run_process(files):
"""
Iterate through specified files and process them if not already
done in the previous run.
"""
for obj in files:
try:
key = os.path.basename(obj)
if key not in shelf:
process_file(x=obj)
shelf[key] = "OK"
else:
print(f"Skipping {obj}")
except Exception as e:
print(f"Failed ({e})")
return f"Processed {len(files)} files."
# This is a file where memories of processed files go.
CACHE_FILE = "processed.cache"
shelf = shelve.open(filename=CACHE_FILE)
create_files(file_list=["file1", "file2", "file3"])
available_files = list_files(path=".")
run_process(files=available_files)
create_files(file_list=["file4", "file5"])
available_files = list_files(path=".")
run_process(files=available_files)
# Close the shelf
shelf.close()
# Remove any files that may have been created during the execution of
# this script.
for file in available_files:
os.unlink(file)
os.unlink(CACHE_FILE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment