Skip to content

Instantly share code, notes, and snippets.

@TheReal1604
Forked from flibbertigibbet/recurse_ftp.py
Created February 2, 2018 14:52
Show Gist options
  • Save TheReal1604/bcb0b313f66f6ce78a7abea811139dae to your computer and use it in GitHub Desktop.
Save TheReal1604/bcb0b313f66f6ce78a7abea811139dae to your computer and use it in GitHub Desktop.
Recursively fetch files from an FTP server directory. Here, it's downloading all the zip files found in or beneath the parent directory.
#!/usr/bin/env python
from ftplib import FTP
from time import sleep
import os
my_dirs = [] # global
my_files = [] # global
curdir = '' # global
def get_dirs(ln):
global my_dirs
global my_files
cols = ln.split(' ')
objname = cols[len(cols)-1] # file or directory name
if ln.startswith('d'):
my_dirs.append(objname)
else:
if objname.endswith('.zip'):
my_files.append(os.path.join(curdir, objname)) # full path
def check_dir(adir):
global my_dirs
global my_files # let it accrue, then fetch them all later
global curdir
my_dirs = []
gotdirs = [] # local
curdir = ftp.pwd()
print("going to change to directory " + adir + " from " + curdir)
ftp.cwd(adir)
curdir = ftp.pwd()
print("now in directory: " + curdir)
ftp.retrlines('LIST', get_dirs)
gotdirs = my_dirs
print("found in " + adir + " directories:")
print(gotdirs)
print("Total files found so far: " + str(len(my_files)) + ".")
sleep(1)
for subdir in gotdirs:
my_dirs = []
check_dir(subdir) # recurse
ftp.cwd('..') # back up a directory when done here
try:
ftp = FTP('ftp2.census.gov')
ftp.login()
check_dir('/geo/tiger/GENZ2012') # directory to start in
ftp.cwd('/.') # change to root directory for downloading
for f in my_files:
print('getting ' + f)
file_name = f.replace('/', '_') # use path as filename prefix, with underscores
ftp.retrbinary('RETR ' + f, open(file_name, 'wb').write)
sleep(1)
except:
print('oh dear.')
ftp.quit()
ftp.quit()
print('all done!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment