u1735067/scrapy-icinga-exchange-plugins.py

## scrapy-icinga-exchange-plugins.py
import scrapy, urllib.parse, re

# Python36\Scripts\scrapy.exe runspider .\icinga-exchange-plugins.py -o plugins.json
# jq ". |= sort_by(.views) | reverse | .[0:30]" plugins.json > plugins-top30views.json

FEED_EXPORT_ENCODING = 'utf-8'

class IcingaExchangeSpider(scrapy.Spider):
	name = 'scrapy-icinga-exchange-plugins'
	allowed_domains = ['exchange.icinga.com']

	def start_requests(self):
		yield scrapy.Request('https://exchange.icinga.com/search?q=category%3A%22Plugins%22',
			callback=self.parse)

	def parse(self, response):
		for content in response.css('.entry-content'):
				yield {
					'link': content.css('h4 ::attr(href)').get().strip(),
					'title': content.css('h4 ::text').get().strip(),
					'author': content.css('.author a ::text').get().strip(),
					'description': content.css('.description ::text').get(),
					'views': int(content.css('.meta').xpath('span[i[contains(@class, "icon-eye")]]/text()[last()]').get().strip()),
					'downloads': int(content.css('.meta').xpath('span[i[contains(@class, "icon-cloud-download")]]/text()[last()]').get().strip()),
					'last_update': content.css('.meta ::attr(datetime)').get(),
				}
		next_page = response.css('.next-page ::attr(href)').get()
		if next_page:
			yield response.follow(next_page, self.parse)
	import scrapy, urllib.parse, re

	# Python36\Scripts\scrapy.exe runspider .\icinga-exchange-plugins.py -o plugins.json
	# jq ". \|= sort_by(.views) \| reverse \| .[0:30]" plugins.json > plugins-top30views.json

	FEED_EXPORT_ENCODING = 'utf-8'

	class IcingaExchangeSpider(scrapy.Spider):
	name = 'scrapy-icinga-exchange-plugins'
	allowed_domains = ['exchange.icinga.com']

	def start_requests(self):
	yield scrapy.Request('https://exchange.icinga.com/search?q=category%3A%22Plugins%22',
	callback=self.parse)

	def parse(self, response):
	for content in response.css('.entry-content'):
	yield {
	'link': content.css('h4 ::attr(href)').get().strip(),
	'title': content.css('h4 ::text').get().strip(),
	'author': content.css('.author a ::text').get().strip(),
	'description': content.css('.description ::text').get(),
	'views': int(content.css('.meta').xpath('span[i[contains(@class, "icon-eye")]]/text()[last()]').get().strip()),
	'downloads': int(content.css('.meta').xpath('span[i[contains(@class, "icon-cloud-download")]]/text()[last()]').get().strip()),
	'last_update': content.css('.meta ::attr(datetime)').get(),
	}
	next_page = response.css('.next-page ::attr(href)').get()
	if next_page:
	yield response.follow(next_page, self.parse)