Skip to content

Instantly share code, notes, and snippets.

@daneah
Last active September 21, 2019 13:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save daneah/ea1b79613a9234f33ee02b8ea4d5ec60 to your computer and use it in GitHub Desktop.
Save daneah/ea1b79613a9234f33ee02b8ea4d5ec60 to your computer and use it in GitHub Desktop.
Leo file processing cleaned up
"""
Recursively examine directory structure verifying that leo files
don't have a size of 0 bytes. Also, verify that the corresponding
xml files don't have a size of 0 bytes and have the same date/time
stamp.
Pre-Requisites
1) Python 3.6 or higher
Needed so that can use pathlib and f-strings
"""
from datetime import datetime
import sys
from pathlib import Path
#
# Constants
#
DRIVE_LETTER_F_AND_ROOT = "F:/"
MAX_TIME_DIFF_BETWEEN_FILES = 1.0 # in seconds
ROOT_DIRECTORY = (
Path(DRIVE_LETTER_F_AND_ROOT)
/ "organization"
/ "ypl.com"
/ "lucente_robert"
/ "p"
/ "py"
/ "python_sw_lang"
/ "official_doco"
/ "std_library"
/ "generic_os_services"
/ "os_misc_os_interfaces"
/ "files_dirs"
/ "os_walk"
/ "code"
/ "example_1"
/ "top_dir"
)
def print_error_notice(error_message):
print(error_message)
print("This is an error condition")
print()
print("---")
def print_last_modified_time(file_title, timestamp):
print(
f"{file_title} last modification (Date Time): ",
timestamp.strftime("%m/%d/%Y %H:%M:%S"),
)
print()
def check_file(leo_file):
print("Leo File: ", leo_file)
if leo_file.stat().st_size == 0:
print_error_notice("Leo file size is not greater than zero")
return
xml_file = leo_file.with_suffix(".xml")
print("XML File: ", xml_file)
if not xml_file.exists():
print_error_notice("XML file corresponding to Leo file does not exists")
return
if xml_file.stat().st_size == 0:
print_error_notice("XML file size is not greater than zero")
return
leo_file_date_time = datetime.fromtimestamp(leo_file.stat().st_mtime)
xml_file_date_time = datetime.fromtimestamp(xml_file.stat().st_mtime)
time_diff_in_seconds = (xml_file_date_time - leo_file_date_time).total_seconds()
if time_diff_in_seconds > MAX_TIME_DIFF_BETWEEN_FILES:
print_last_modified_time("Leo file", leo_file_date_time)
print_last_modified_time("XML file", xml_file_date_time)
print_error_notice(
f"Last modified times between files differ by more than 1 second: {time_diff_in_seconds} seconds"
)
return
def print_metrics(start_time, end_time):
print("Start Time: ", start_time.strftime("%m/%d/%Y %H:%M:%S"))
print("End Time: ", end_time.strftime("%m/%d/%Y %H:%M:%S"))
processing_time_in_seconds = (end_time - start_time).total_seconds()
print(f"Processing time in seconds: {processing_time_in_seconds}")
print(f"Processing time in minutes: {processing_time_in_seconds / 60}")
print(f"Processing time in hours: {processing_time_in_seconds / 3600}")
def main():
"""
Main function that orchestrates all activity.
"""
if not ROOT_DIRECTORY.exists():
sys.exit(f"Invalid directory for starting search: {ROOT_DIRECTORY}")
start_time = datetime.now()
for leo_file in ROOT_DIRECTORY.glob("**/*.leo"):
check_file(leo_file)
end_time = datetime.now()
print_metrics(start_time, end_time)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment