Skip to content

Instantly share code, notes, and snippets.

@guy-a
Created May 3, 2015 22:22
Show Gist options
  • Save guy-a/6bdc8f530f1b7f392699 to your computer and use it in GitHub Desktop.
Save guy-a/6bdc8f530f1b7f392699 to your computer and use it in GitHub Desktop.
A python script for leeching a folder on an FTP. run$ python supervisor.py leacher.py
import os
import logging
import ftplib
from ftplib import FTP
class Leacher:
def __init__(self, host, account, passwd, ftp_folder='', local_folder_path='', delete_files='False', file_match=''):
logging.basicConfig(filename='leacher.log', format='%(asctime)s - %(levelname)s: %(message)s', level=logging.DEBUG)
self.host = host
self.account = account
self.passwd = passwd
self.ftp_folder = ftp_folder
self.local_folder_path = local_folder_path
self.delete_files = delete_files
self.file_match = file_match
self.files_count = 0
self.down_count = 0
self.del_count = 0
self.curr_count = 0
self.all_count = 0
self.connect()
self.get_files()
def connect(self):
try:
self.ftp = FTP(self.host)
self.log('\n\n')
self.log('Connecting to %s' % self.host, True)
self.ftp.login(self.account, self.passwd)
self.ftp.cwd(self.ftp_folder)
self.log('Switched to folder %s' % self.ftp_folder, True)
except ftplib.all_errors as ex:
self.log('Cant connect, ex: %s' % ex, True, logging.ERROR)
def get_files(self):
filenames = self.ftp.nlst(self.file_match)
files_count = len(filenames)
self.log('Found %s files...' % files_count, True)
self.all_count += files_count
self.process_files(filenames)
def process_files(self, filenames):
self.curr_count = 0
for filename in filenames:
try:
if self.ftp.size(filename) > 0:
local_file = open(os.path.join(self.local_folder_path, filename), 'wb')
self.ftp.retrbinary('RETR ' + filename, local_file.write)
local_file.close()
self.down_count += 1
self.log('Downloaded "%s"' % filename)
if delete_files:
self.ftp.delete(filename)
self.del_count += 1
self.log('Deleted "%s"' % filename)
except ftplib.error_perm as ex:
#Ex: 550 I can only retrieve regular files
self.log('"%s" is not a file.\nEx: %s' % (filename, ex))
except ftplib.all_errors as ex:
#IOError ex: [Errno 60] Operation timed out, [Errno 50] Network is down, [Errno 32] Broken pipe, [Errno 54] Connection reset by peer
self.log('Ex: %s' % ex, True, logging.ERROR)
self.log('...trying to recoonect', True)
self.connect()
break
self.curr_count += 1
if (self.curr_count == 10000 and self.delete_files):
self.log('...get the next 10000 files', True)
self.get_files()
def log(self, msg, echo_it=False, lvl=logging.DEBUG):
logging.log(lvl, msg)
if echo_it:
print msg
#This is not used as it's not needed
def is_file(filename):
try:
self.ftp.size(filename)
return True
except ftplib.error_perm as ex:
return False
''' Settings - start '''
host = 'ftp.ubuntu.com'
account = ''
passwd = ''
ftp_folder = 'ubuntu/project' #Remote FTP folder
local_folder = '' #Leave blank or create this folder manually, it won't be created by the script
local_folder_path = os.path.join(os.getcwd(), local_folder)
file_match = ''
#Settting this to True will delete the file after download or if the file have zero bytes
delete_files = False
''' Settings - end '''
#Init the leacher
leacher = Leacher(host, account, passwd, ftp_folder, local_folder_path, delete_files, file_match)
leacher.log('\n************************\nProccessed %s items overall, deleted %s, saved %s\n************************\n' % (leacher.all_count, leacher.del_count, leacher.down_count), True)
#Creating an exception so the script will exit and supervisor.py will restart it
#[][1]
import sys
import subprocess
import time
if 'darwin' in sys.platform:
print('Running \'caffeinate\' on MacOSX to prevent the system from sleeping')
subprocess.Popen('caffeinate')
#subprocess.Popen(['caffeinate', '-is'])
to_run = [sys.executable] + sys.argv[1:]
print('Running$ %s' % ' '.join(to_run))
while True:
try:
stopped = proc.poll()
except:
stopped = True
if stopped:
proc = subprocess.Popen(to_run)
time.sleep(10)
#Use like:
#python supervisor.py my_script.py param1 param2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment