DimasInchidi/main.py

## main.py
#!/usr/bin/env python
import asyncio
import re
import time
import aiohttp  # pip install aiohttp
from bs4 import BeautifulSoup  # pip install beautifulsoup4


async def get_response_text(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as r:
            return await r.text()


async def crawl(i):
    url = f"https://www.passiton.com/inspirational-quotes?page={i}"
    r = await get_response_text(url)
    soup = BeautifulSoup(r, 'html.parser')
    results = soup.find_all('div', {'class': 'portfolio-image'})
    for result in results:
        quotes = result('a')[0]('img')[0]['alt']
        quotes.replace('\n\n', '\n').replace('\n', ' ').replace('  ', ' ')
        quotes = re.sub(' #<Author:.+>', '', quotes)
        print(quotes, end='\n\n')


def start(chunk):
    loop = asyncio.get_event_loop()
    tasks = [
        asyncio.ensure_future(crawl(i)) for i in chunk
    ]
    loop.run_until_complete(asyncio.wait(tasks))
    loop.close()


if __name__ == '__main__':
    max_page = 50
    iterable = range(1, max_page)
    start_time = time.time()

    start(iterable)

    end_time = time.time()
    print(
        f"Total crawl time of {max_page} page{'' if max_page<2 else 's'} in {end_time - start_time} seconds"
    )
	#!/usr/bin/env python
	import asyncio
	import re
	import time
	import aiohttp # pip install aiohttp
	from bs4 import BeautifulSoup # pip install beautifulsoup4


	async def get_response_text(url):
	async with aiohttp.ClientSession() as session:
	async with session.get(url) as r:
	return await r.text()


	async def crawl(i):
	url = f"https://www.passiton.com/inspirational-quotes?page={i}"
	r = await get_response_text(url)
	soup = BeautifulSoup(r, 'html.parser')
	results = soup.find_all('div', {'class': 'portfolio-image'})
	for result in results:
	quotes = result('a')[0]('img')[0]['alt']
	quotes.replace('\n\n', '\n').replace('\n', ' ').replace(' ', ' ')
	quotes = re.sub(' #<Author:.+>', '', quotes)
	print(quotes, end='\n\n')


	def start(chunk):
	loop = asyncio.get_event_loop()
	tasks = [
	asyncio.ensure_future(crawl(i)) for i in chunk
	]
	loop.run_until_complete(asyncio.wait(tasks))
	loop.close()


	if __name__ == '__main__':
	max_page = 50
	iterable = range(1, max_page)
	start_time = time.time()

	start(iterable)

	end_time = time.time()
	print(
	f"Total crawl time of {max_page} page{'' if max_page<2 else 's'} in {end_time - start_time} seconds"
	)