amirulasyraf88/spider.py

## spider.py
#Extracting first page of http://www.mudah.my/malaysia/cars-for-sale
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.selector import Selector
from items import NewsItem

filename = "output.csv"
fields = ["headline", "price", "url"] # define fields to use

with open(filename,'a+') as f: # handle the source file
    f.write("{}\n".format(';'.join(str(field) for field in fields))) # write header

class BbcSpider(CrawlSpider):
    name = "mudahspider"
    allowed_domains = ["mudah.my"]
    start_urls = ('http://www.mudah.my/malaysia/cars-for-sale',)

    def parse(self, response):
        hxs = Selector(response)
        titles = hxs.xpath('//div[@id="ContainerMain"]')

        with open(filename,'a+') as f: # handle the source file
            for title in titles:
                story = NewsItem()
                story['url'] =  title.xpath('//div[@class="top_params"]/h2[@class="list_title truncate"]/a/@href').extract()
                story['headline'] = title.xpath('//div[@class="top_params"]/h2[@class="list_title truncate"]/a/@title').extract()
                story['price'] =  title.xpath('//div[@class="middle_params"]/div[@class="ads_price"]/meta[@itemprop="price"]/@content').extract()
                for index in range(0,len(story['url'])-1):
                    f.write("{}\n".format(';'.join(str(story[field][index]) for field in fields))) # write items
	#Extracting first page of http://www.mudah.my/malaysia/cars-for-sale
	from scrapy.spiders import CrawlSpider, Rule
	from scrapy.linkextractors import LinkExtractor
	from scrapy.selector import Selector
	from items import NewsItem

	filename = "output.csv"
	fields = ["headline", "price", "url"] # define fields to use

	with open(filename,'a+') as f: # handle the source file
	f.write("{}\n".format(';'.join(str(field) for field in fields))) # write header

	class BbcSpider(CrawlSpider):
	name = "mudahspider"
	allowed_domains = ["mudah.my"]
	start_urls = ('http://www.mudah.my/malaysia/cars-for-sale',)

	def parse(self, response):
	hxs = Selector(response)
	titles = hxs.xpath('//div[@id="ContainerMain"]')

	with open(filename,'a+') as f: # handle the source file
	for title in titles:
	story = NewsItem()
	story['url'] = title.xpath('//div[@class="top_params"]/h2[@class="list_title truncate"]/a/@href').extract()
	story['headline'] = title.xpath('//div[@class="top_params"]/h2[@class="list_title truncate"]/a/@title').extract()
	story['price'] = title.xpath('//div[@class="middle_params"]/div[@class="ads_price"]/meta[@itemprop="price"]/@content').extract()
	for index in range(0,len(story['url'])-1):
	f.write("{}\n".format(';'.join(str(story[field][index]) for field in fields))) # write items