Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
#!/usr/bin/env python
# encoding: utf-8
@author: xl7dev
from aiohttp import ClientSession
import asyncio
import optparse
from pymongo import MongoClient
from lxml import etree
client = MongoClient('', 27017)
db = client['test']
> python3 urls.txt
async def fetch(sem, url):
async with sem:
async with ClientSession() as session:
async with session.get(url, timeout=20) as response:
status = response.status
headers = response.headers
resp = await response.text()
html = etree.HTML(resp)
title = html.xpath('//title/text()')[0].strip()
item = {"url": url, "status": status, "headers": headers, "title": title}
except Exception as e:
print(url, e)
async def run(urls):
# create instance of Semaphore
sem = asyncio.Semaphore(500)
tasks = [asyncio.create_task(fetch(sem, url)) for url in urls]
responses = asyncio.gather(*tasks)
await responses
if __name__ == "__main__":
parser = optparse.OptionParser()
parser.add_option("-f", "--filename", dest="filename", help="Target filename")
options, _ = parser.parse_args()
if options.filename:
x = options.filename
urls = [
x.strip() if x.strip().startswith('http://') or x.strip().startswith('https://') else "http://" + x.strip()
for x in open(x)]
loop = asyncio.get_event_loop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment