Skip to content

Instantly share code, notes, and snippets.

@cosmicexplorer
Created September 25, 2016 22:39
Show Gist options
  • Save cosmicexplorer/4a60c8e5e4f874a04fd101c4e1abd1ce to your computer and use it in GitHub Desktop.
Save cosmicexplorer/4a60c8e5e4f874a04fd101c4e1abd1ce to your computer and use it in GitHub Desktop.
example of fetching multiple webpages at once using asynchrony with grequests
# need to 'pip install grequests'
import grequests
from html.parser import HTMLParser
class FetchTitleTag(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.inTitle = False
def handle_starttag(self, tag, attrs):
if tag == 'title':
self.inTitle = True
def handle_endtag(self, tag):
if tag == 'title':
self.inTitle = False
def handle_data(self, data):
if self.inTitle:
print('title: "%s"' % data.strip())
urls = [
'http://google.com',
'http://github.com'
]
# version using grequests (https://github.com/kennethreitz/grequests)
# same api as requests, mostly (http://docs.python-requests.org/en/master/)
requests_unsent = (grequests.get(u) for u in urls)
# like map, but returns generator
requests_iterable = grequests.imap(requests_unsent)
parser = FetchTitleTag()
# this is done synchronously -- can also potentially do on a queue
for response in requests_iterable:
parser.feed(response.text)
# prints text of the response (just an html page)
# print(response.text)
# prints the status code of the response (should be 200)
print(response.status_code)
# can also potentially do this with asyncio/aiohttp, but more annoying to set up
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment