Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from scrapy.spider import Spider
from scrapy.selector import Selector
from yp.items import YpItem
class YpSpider(Spider):
name = "yp"
allowed_domains = ["yellowpages.com"]
start_urls = [
"http://www.yellowpages.com/ft-worth-tx/churches?g=ft.%20worth%2C%20tx&q=churches"
]
def parse(self, response):
sel = Selector(response)
divs = sel.xpath('//div[@id="main-content"]')
items = []
for span in divs.select('.//div[@class="info"]'):
item = YpItem()
item['name'] = divs.xpath('.//span[@itemprop="name"]/text()').extract()
item['streetAddress'] = divs.xpath('.//span[@itemprop="streetAddress"]/text()').extract()
item['addressCity'] = divs.xpath('.//span[@itemprop="addressLocality"]/text()').extract()
item['addressState'] = divs.xpath('.//span[@itemprop="addressRegion"]/text()').extract()
item['addressZip'] = divs.xpath('.//span[@itemprop="postalCode"]/text()').extract()
item['phone'] = divs.xpath('.//li[@itemprop="telephone"]/text()').extract()
items.append(item)
return items
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.