yashrsharma44/quotes.py

## quotes.py
import scrapy
from scrapy.Fetch import Fetch
import asyncio
import aiohttp

class QuotesSpider(scrapy.Spider):
    name = "quotes"

    async def start_requests(self):
        urls = [
            'http://quotes.toscrape.com/page/1/',
            'http://quotes.toscrape.com/page/2/'

        ]

        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    async def parse(self, response):

        links = [(response.xpath('//@href').extract()[-1])]
        links.append(response.xpath('//@href').extract()[-2])

        print("Started the aiohttp module!!")
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            print("Inside the aiohttp Client Session!!")
            html = await self.fetch(session, 'https://python-forum.io/Thread-Exploring-async-await-without-knowing-how-they-work-ahead-of-time?pid=17292')
            print(html)
        print("Completed the aiohttp!!")

        spider = response.spider # One has to get spider and crawler with response, in order to use Fetch. Will work on updating this!
        crawler = response.crawler
        for link in links:
            res = await Fetch(url=link, crawler=crawler, spider=spider) # You can use yield scrapy.Request(...), for using a callback
            print("Before the asyncio.sleep!!")
            await asyncio.sleep(5)
            print("___RESPONSE___and link {!r}__________________________________________________________{!r}".format(link,res))

        print("---------------------------END OF PARSE------------------------------------------------")

    async def parse2(self, response):
        page = response.url.split("/")[-2]
        print("/////////////////////-----------IN PARSE 2----------------------------//////////////////////")
        filename = 'quotes-%s.html' % page
        with open(filename, 'wb') as f:
            f.write(response.body)
        self.log('Saved file %s' % filename)

        print("----END OF PARSE2 ------------")
	import scrapy
	from scrapy.Fetch import Fetch
	import asyncio
	import aiohttp

	class QuotesSpider(scrapy.Spider):
	name = "quotes"

	async def start_requests(self):
	urls = [
	'http://quotes.toscrape.com/page/1/',
	'http://quotes.toscrape.com/page/2/'

	]

	for url in urls:
	yield scrapy.Request(url=url, callback=self.parse)

	async def parse(self, response):

	links = [(response.xpath('//@href').extract()[-1])]
	links.append(response.xpath('//@href').extract()[-2])

	print("Started the aiohttp module!!")
	conn = aiohttp.TCPConnector(verify_ssl=False)
	async with aiohttp.ClientSession(connector=conn) as session:
	print("Inside the aiohttp Client Session!!")
	html = await self.fetch(session, 'https://python-forum.io/Thread-Exploring-async-await-without-knowing-how-they-work-ahead-of-time?pid=17292')
	print(html)
	print("Completed the aiohttp!!")

	spider = response.spider # One has to get spider and crawler with response, in order to use Fetch. Will work on updating this!
	crawler = response.crawler
	for link in links:
	res = await Fetch(url=link, crawler=crawler, spider=spider) # You can use yield scrapy.Request(...), for using a callback
	print("Before the asyncio.sleep!!")
	await asyncio.sleep(5)
	print("___RESPONSE___and link {!r}__________________________________________________________{!r}".format(link,res))

	print("---------------------------END OF PARSE------------------------------------------------")

	async def parse2(self, response):
	page = response.url.split("/")[-2]
	print("/////////////////////-----------IN PARSE 2----------------------------//////////////////////")
	filename = 'quotes-%s.html' % page
	with open(filename, 'wb') as f:
	f.write(response.body)
	self.log('Saved file %s' % filename)

	print("----END OF PARSE2 ------------")