Skip to content

Instantly share code, notes, and snippets.

@saurabhwahile
Created June 18, 2017 04:44
Show Gist options
  • Save saurabhwahile/9ace5930b7e5c49d1b76f8d9cf52f90b to your computer and use it in GitHub Desktop.
Save saurabhwahile/9ace5930b7e5c49d1b76f8d9cf52f90b to your computer and use it in GitHub Desktop.
Crawler For FBO.gov
# -*- coding: utf-8 -*-
import datetime
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class FBOSpider(scrapy.Spider):
name = "FBOSpider"
start_url = "https://www.fbo.gov/index?s=opportunity&mode=list&tab=list&pageID={page_no}"
def start_requests(self):
for i in range(20):
yield scrapy.Request(url=self.start_url.format(page_no=i), callback=self.parse)
def parse(self, response):
for opportunity in response.xpath('//*[starts-with(@id, "row_")]/td[1]/a/@href'):
yield scrapy.Request(url="https://www.fbo.gov/index"+opportunity.extract(), callback=self.parse_opportunity)
def parse_opportunity(self, response):
yield {
"response": response.xpath('//*[@id="so_formfield_dnf_class_values_procurement_notice__description_"]').extract()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment