Last active
October 7, 2019 18:04
-
-
Save jgarciabu/3dcdca7505ff308cc62329064c6ec5b1 to your computer and use it in GitHub Desktop.
Script created to monitor an incoming file directory and move files, from oldest to newest, to another directory to be processed by another data flow process. The key to this script is to limit the file moves to 200MB so that the data flow process doesn't choke when RAM runs out. Additionally, if a single file exceeds 200 MB, then an email goes …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Sep 25 10:16:33 2019 | |
@author: jeffrey.garcia | |
""" | |
import os | |
import shutil | |
import smtplib | |
maxSize = 200000000 | |
runningTotal = 0 | |
sourceFiles = XXX | |
destinationFolder = XXX | |
rejectFolder = XXX | |
def sendemail(from_addr, to_addr_list, cc_addr_list, | |
subject, message, | |
login, password, | |
smtpserver='smtp.gmail.com:587'): | |
header = 'From: %s\n' % from_addr | |
header += 'To: %s\n' % ','.join(to_addr_list) | |
header += 'Cc: %s\n' % ','.join(cc_addr_list) | |
header += 'Subject: %s\n\n' % subject | |
message = header + message | |
server = smtplib.SMTP(smtpserver) | |
server.starttls() | |
server.login(login,password) | |
server.sendmail(from_addr, to_addr_list, message) | |
server.quit() | |
def get_filepaths(directory): | |
""" | |
This function will generate the file names in a directory | |
tree by walking the tree either top-down or bottom-up. For each | |
directory in the tree rooted at directory top (including top itself), | |
it yields a 3-tuple (dirpath, dirnames, filenames). | |
""" | |
file_paths = [] # List which will store all of the full filepaths. | |
# Walk the tree. | |
for root, directories, files in os.walk(directory): | |
for filename in files: | |
# Join the two strings in order to form the full filepath. | |
filepath = os.path.join(root, filename) | |
file_paths.append(filepath) # Add it to the list. | |
break | |
return file_paths | |
for root, directories, files in os.walk(destinationFolder): | |
if len(files) == 0: | |
newFiles = sorted(get_filepaths(sourceFiles), key=os.path.getctime) | |
while runningTotal < maxSize: | |
try: | |
if len(newFiles) == 0: | |
break | |
if os.path.getsize(newFiles[0]) + runningTotal <= maxSize: | |
filename = os.path.basename(newFiles[0]) | |
runningTotal += os.path.getsize(newFiles[0]) | |
shutil.move(newFiles[0], destinationFolder + filename) | |
newFiles = sorted(get_filepaths(sourceFiles), key=os.path.getctime) | |
continue | |
if os.path.getsize(newFiles[0]) > maxSize: | |
filename = os.path.basename(newFiles[0]) | |
sendemail(from_addr = "XXXX", | |
to_addr_list = ["XXXX"], | |
cc_addr_list = [""], | |
subject = "EDI File Rejected: Exceeded Max Size", | |
message = "A file larger than the EDI server limit has arrived and has been sent to the rejected folder: " + rejectFolder + "\n\n ================= File size: " + str(round(os.path.getsize(newFiles[0])/1000000)) + " MB\n\n ================= File name: " + filename, | |
login = "XXXX", | |
password = "XXX") | |
shutil.move(newFiles[0], rejectFolder + filename) | |
newFiles = sorted(get_filepaths(sourceFiles), key=os.path.getctime) | |
continue | |
if os.path.getsize(newFiles[0]) + runningTotal > maxSize: | |
runningTotal = maxSize | |
break | |
else: | |
break | |
except IndexError as ex: | |
runningTotal += 200000001 | |
newFiles = sorted(get_filepaths(sourceFiles), key=os.path.getctime) | |
else: | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment