Skip to content

Instantly share code, notes, and snippets.

@madjar
Last active March 5, 2023 15:02
Show Gist options
  • Save madjar/9312452 to your computer and use it in GitHub Desktop.
Save madjar/9312452 to your computer and use it in GitHub Desktop.
A example of scrapper using asyncio and aiohttp
import asyncio
import aiohttp
import bs4
import tqdm
@asyncio.coroutine
def get(*args, **kwargs):
response = yield from aiohttp.request('GET', *args, **kwargs)
return (yield from response.read_and_close(decode=True))
@asyncio.coroutine
def wait_with_progress(coros):
for f in tqdm.tqdm(asyncio.as_completed(coros), total=len(coros)):
yield from f
def first_magnet(page):
soup = bs4.BeautifulSoup(page)
a = soup.find('a', title='Download this torrent using magnet')
return a['href']
@asyncio.coroutine
def print_magnet(query):
url = 'http://thepiratebay.se/search/{}/0/7/0'.format(query)
with (yield from sem):
page = yield from get(url, compress=True)
magnet = first_magnet(page)
print('{}: {}'.format(query, magnet))
distros = ['archlinux', 'ubuntu', 'debian']
sem = asyncio.Semaphore(5)
loop = asyncio.get_event_loop()
f = asyncio.wait([print_magnet(d) for d in distros])
loop.run_until_complete(f)
@Wikidepia
Copy link

import asyncio
import aiohttp
import bs4
import tqdm


async def get(*args, **kwargs):
    async with aiohttp.ClientSession() as session:
        async with session.get(*args, **kwargs) as resp:
            return (await resp.text())


def first_magnet(page):
    soup = bs4.BeautifulSoup(page, features="lxml")
    a = soup.find('a', title='Download this torrent using magnet')
    return a['href']


async def print_magnet(query):
    url = 'http://thepiratebay.se/search/{}/0/7/0'.format(query)
    async with sem:
        page = await get(url, compress=True)
    magnet = first_magnet(page)
    print('{}: {}'.format(query, magnet))


distros = ['archlinux', 'ubuntu', 'debian']
sem = asyncio.Semaphore(5)
loop = asyncio.get_event_loop()
f = asyncio.wait([print_magnet(d) for d in distros])
loop.run_until_complete(f)

Tested with Python 3.8, but rip ThePirateBay.

@bartosz-bear
Copy link

bartosz-bear commented Mar 5, 2023

This gist doesn't work with the latest version of Python and asyncio, as of March 2023.

@asyncio.coroutine doesn't exist in asyncio any longer.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment