Skip to content

Instantly share code, notes, and snippets.

@laranea
Forked from peeterskris/ndw-crawler
Last active August 29, 2015 14:11
Show Gist options
  • Save laranea/32a722c0e87cc7b627ed to your computer and use it in GitHub Desktop.
Save laranea/32a722c0e87cc7b627ed to your computer and use it in GitHub Desktop.
from ftplib import FTP
import ftplib
from datetime import datetime
import os
import pickle
FTP_SERVER = '83.247.110.3'
FTP_FOLDER = '/'
DESTINATION = '/volume1/Cluster/data/nl-traffic/'
#DESTINATION = 'out/'
def crawl(server, folder, destination):
pfile = DESTINATION + "files.pickle"
files = {}
if os.path.isfile(pfile):
files = pickle.load(open(pfile, "rb"))
ftp = FTP(server)
ftp.login()
ftp.cwd(folder)
try:
flist = ftp.nlst()
except ftplib.error_perm, resp:
if str(resp) == "550 No files found":
print "No files in this directory"
else:
raise
for f in flist:
if f not in files:
files[f] = None
modifiedTime = datetime.strptime(ftp.sendcmd('MDTM ' + f)[4:], "%Y%m%d%H%M%S")
if files[f] == None or files[f] < modifiedTime:
try:
(base, extension) = f.split('.', 1)
fname = "%s-%s.%s" %(base, modifiedTime.strftime('%Y-%m-%d_%H-%M-%S'), extension)
print 'downloading %s: %s' % (f, modifiedTime)
ftp.retrbinary("RETR " + f, open(destination + fname, 'wb').write)
except:
print "Error"
else:
print 'ignore %s: %s' % (f, modifiedTime)
files[f] = modifiedTime
pickle.dump(files, open(pfile, "wb"))
crawl(FTP_SERVER, FTP_FOLDER, DESTINATION)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment