Skip to content

Instantly share code, notes, and snippets.

@ChakshuGautam
Last active November 20, 2018 07:32
Show Gist options
  • Save ChakshuGautam/6263b27f09eecfb1e8732d7c3a91ef61 to your computer and use it in GitHub Desktop.
Save ChakshuGautam/6263b27f09eecfb1e8732d7c3a91ef61 to your computer and use it in GitHub Desktop.
Download IMDB files and save them in a directory.
import asyncio
from contextlib import closing
import aiohttp
import aiofiles
async def download_file(session: aiohttp.ClientSession, url: str):
async with session.get(url) as response:
assert response.status == 200
print("Started saving file: ", url.split('/')[-1])
f = await aiofiles.open(url.split('/')[-1], mode='wb')
await f.write(await response.read())
await f.close()
return url.split('/')[-1]
async def download_multiple(session: aiohttp.ClientSession):
urls = [
'https://datasets.imdbws.com/name.basics.tsv.gz',
'https://datasets.imdbws.com/title.akas.tsv.gz',
'https://datasets.imdbws.com/title.basics.tsv.gz',
'https://datasets.imdbws.com/title.crew.tsv.gz',
'https://datasets.imdbws.com/title.episode.tsv.gz',
'https://datasets.imdbws.com/title.principals.tsv.gz',
'https://datasets.imdbws.com/title.ratings.tsv.gz'
]
download_futures = [download_file(session, url) for url in urls]
print('Results')
for download_future in asyncio.as_completed(download_futures):
result = await download_future
print('finished saving:', result)
return urls
async def main():
async with aiohttp.ClientSession() as session:
result = await download_multiple(session)
print('finished:', result)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
@ChakshuGautam
Copy link
Author

ChakshuGautam commented Nov 20, 2018

Downloads and saves files aynchronously.
You might run into some memory problems though if you are not careful about the number of files to downlaod.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment