Skip to content

Instantly share code, notes, and snippets.

@lanfon72
Last active August 4, 2017 09:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lanfon72/163389fdb02244a2c94661cb8841207d to your computer and use it in GitHub Desktop.
Save lanfon72/163389fdb02244a2c94661cb8841207d to your computer and use it in GitHub Desktop.
saving files from garner ftp.
import os
from ftplib import FTP
import asyncio
import aiohttp
from aiofiles import open as aopen
def get_connect(url):
root, foldr = url.split('//', 1)[-1].split('/', 1)
ftp = FTP(root)
ftp.login()
ftp.cwd(foldr)
return ftp
async def save_file(path, url, semaphore):
try:
async with semaphore, aiohttp.request('GET', url) as r:
data = await r.read()
async with aopen(path, 'wb') as f:
await f.write(data)
except Exception as e:
print("get error on", path, "due to", repr(e))
return path
else:
print(path, "saved.")
return None
async def main(files, url):
concurrent = asyncio.Semaphore(256)
futs = [save_file(f, "%s/%s" % (url, f), concurrent) for f in files]
done, _ = await asyncio.wait(futs)
done = [f.result() for f in done]
while any(done):
print("fail to fetched:", len([i for i in done if i]))
pending = [save_file(f, "%s/%s" % (url, f), concurrent) for f in filter(None, done)]
done, _ = await asyncio.wait(pending)
done = [f.result() for f in done]
if __name__ == '__main__':
url = "garner.ucsd.edu/pub/rinex/2003/303"
conn = get_connect(url)
files = set(conn.nlst()).difference(os.listdir())
conn.quit()
if files:
loop = asyncio.get_event_loop()
url = r'http://anonymous:jason%40ucsd.edu@' + url
loop.run_until_complete(main(files, url))
from ftplib import FTP
def get_connect(url):
root, foldr = url.split('//', 1)[-1].split('/', 1)
ftp = FTP(root)
ftp.login()
ftp.cwd(foldr)
return ftp
def download_file(ftp, filename):
try:
with open(filename, 'wb') as f:
ftp.retrbinary("RETR %s" % filename, f.write)
except Exception as e:
print(e)
ftp.quit()
return None, None
else:
return ftp, filename
def main(files, url):
files = set(files)
ftp = get_connect(url)
fname = files.pop()
while files:
ftp, fname = download_file(ftp, fname)
ftp = ftp if ftp else get_connect(url)
fname = files.pop() if fname else fname
if __name__ == '__main__':
url = "garner.ucsd.edu/pub/rinex/2003/302"
conn = get_connect(url)
files = conn.nlst()
conn.quit()
main(files, url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment