Skip to content

Instantly share code, notes, and snippets.

@KHN190
Created April 2, 2019 09:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save KHN190/f85c1e992c28f836965ee534e68b90aa to your computer and use it in GitHub Desktop.
Save KHN190/f85c1e992c28f836965ee534e68b90aa to your computer and use it in GitHub Desktop.
import aiohttp
import asyncio
import logging
from tqdm import tqdm
USERNAME = ""
PASSWORD = ""
LOGIN_URL = ""
async def parse(url, sess, log):
async with sess.get(url) as resp:
if resp.status == 200:
tree = html.fromstring(await resp.text())
# do something
else:
log.warning("fail: %s" % url)
async def main():
logging.basicConfig(
filename='async_crawler.log', filemode='a', level=logging.INFO)
payload = payload = {
"user_name": USERNAME,
"password": PASSWORD,
}
loop = asyncio.get_event_loop()
connector = aiohttp.TCPConnector(limit=20)
request_urls = read_urls()
async with aiohttp.ClientSession(loop=loop, connector=connector) as sess:
async with sess.post(
LOGIN_URL, data=payload, headers=headers(LOGIN_URL)) as resp:
assert resp.status == 200
for url in tqdm(request_urls):
await parse(url, sess, log=logging)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment