Skip to content

Instantly share code, notes, and snippets.

@pawelmhm
Created February 25, 2016 20:36
Show Gist options
  • Save pawelmhm/4b0184e0cb97f2473af0 to your computer and use it in GitHub Desktop.
Save pawelmhm/4b0184e0cb97f2473af0 to your computer and use it in GitHub Desktop.
basic crawler
#!/usr/local/bin/python3.5
import asyncio
from aiohttp import ClientSession
class Crawler(object):
i = 1
limit = 5
url = "http://localhost:8080/{}"
def __init__(self, loop):
self.loop = loop
self.tasks = set()
def busy(self):
return self.i < self.limit
def tasks_remain(self):
print("tasks remaining {}".format(len(self.tasks)))
return len(self.tasks) != 0
@asyncio.coroutine
def run(self):
while self.busy():
yield from asyncio.sleep(0)
asyncio.Task(self.start())
print("outside run")
while self.tasks_remain():
yield from asyncio.sleep(0)
self.loop.stop()
@asyncio.coroutine
def start(self):
task = asyncio.Task(self.fetch(self.url.format(self.i)))
task.add_done_callback(self.print_response)
def remove_task(task):
print("about to remove task")
self.tasks.remove(task)
self.i += 1
task.add_done_callback(remove_task)
self.tasks.add(task)
@asyncio.coroutine
def fetch(self, url):
with ClientSession() as session:
print("about to fetch... {}".format(url))
response = yield from session.get(url)
response_body = yield from response.read()
return response_body
def print_response(self, response):
print("got response here {}".format(response.result()))
loop = asyncio.get_event_loop()
c = Crawler(loop)
asyncio.Task(c.run())
loop.run_forever()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment