Instantly share code, notes, and snippets.

@HanaanY /giraffes.py Secret
Last active Jul 24, 2018

Embed
What would you like to do?
let's grab images
import os.path
import csv
import asks
import trio
import sys
from IPython.display import Image
import ast
# HDD on remote machine
_DEFAULT_DIR = '/media/naan/WinStorage/SnapshotSerengeti/'
_DEFAULT_DOWNLOAD_DIR = _DEFAULT_DIR + 'Giraffes/'
_DEFAULT_REPO = "https://snapshotserengeti.s3.msi.umn.edu/"
asks.init('trio')
def test():
test_url = _DEFAULT_REPO + 'S1/B05/B05_R2/S1_B05_R2_PICT0044.JPG'
test_target = _DEFAULT_DOWNLOAD_DIR + 'S1_B05_R2_PICT0044.JPG'
trio.run(download_file,test_url)
def log(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
async def download_file(url, session, directory=_DEFAULT_DOWNLOAD_DIR):
tail = url.split('/')[-1]
target = os.path.join(directory,tail)
if not os.path.exists(target):
response = await session.get(url, retries=10)
print("requested {}".format(tail))
if response.status_code != 200:
log("response error when downloading file from {} to {}".format(url, target))
return
async with await trio.open_file(target, 'wb') as out_file:
await out_file.write(response.body)
async def download_giraffes(giraffe_list=_DEFAULT_DIR + 'giraffes.csv', repo=_DEFAULT_REPO):
rows = []
session = asks.Session(connections=70)
with open(giraffe_list, 'r') as giraffe_list:
reader = csv.reader(giraffe_list)
for row in reader:
url = _DEFAULT_REPO + row[3]
rows.append(url)
async with trio.open_nursery() as n:
for row in rows:
n.start_soon(download_file, row, session)
print("All done.")
trio.run(download_giraffes)
#test()
@theelous3

This comment has been minimized.

theelous3 commented Jul 23, 2018

import os.path 
import csv
import asks
import trio
import sys
from IPython.display import Image
import ast

# HDD on remote machine
_DEFAULT_DIR = '/media/naan/WinStorage/SnapshotSerengeti/' 
_DEFAULT_DOWNLOAD_DIR = _DEFAULT_LINK_DIR + 'Giraffes/' 
_DEFAULT_REPO = "https://snapshotserengeti.s3.msi.umn.edu/"

asks.init('trio')


async def download_file(url, directory=_DEFAULT_DOWNLOAD_DIR, repo=_DEFAULT_REPO):
    image = url.split('/')[-1]
    target = os.path.join(directory,image) 
    response = await asks.get(url)
    if response.status_code != 200:
        log("response error when downloading file from {} to {}".format(url, target))
        return
    async with await trio.open_file(target, 'wb') as out_file:
        await out_file.write(response.body)

async def download_giraffes(giraffe_list=_DEFAULT_DIR + 'giraffes.csv', repo=_DEFAULT_REPO):
    rows = []
    with open(giraffe_list, 'r') as giraffe_list:
        reader = csv.reader(giraffe_list)
        for row in reader:
            rows.append(row)

    async with trio.open_nursery() as n:
        for row in rows:
            n.start_soon(download_file, the_url)

    print("All done.")


trio.run(download_giraffes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment