Skip to content

Instantly share code, notes, and snippets.

# Scrapy settings for ProjetVinNicolas3 project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/topics/settings.html
#
BOT_NAME = 'ProjetVinNicolas4'
#!/usr/bin/python
#-*- coding: utf-8 -*-
from ProjetVinNicolas4.items import Projetvinnicolas4Item
from scrapy.contrib.exporter import CsvItemExporter
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.pipeline.images import ImagesPipeline
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import HtmlXPathSelector
import lxml
import lxml.etree
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/topics/items.html
from scrapy.item import Item, Field
class Projetvinnicolas4Item(Item):
millesime= Field()
temp_Conserv = Field()
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/topics/items.html
from scrapy.item import Item, Field
class Projetvinnicolas3Item(Item):
nomVin = Field()
temp_Conserv = Field()
@nahali
nahali / WineSpider3.py
Created September 5, 2013 08:34
wine crawling from www.nicolas.com
#!/usr/bin/python
#-*- coding: utf-8 -*-
from ProjetVinNicolas3.items import Projetvinnicolas3Item
from scrapy.contrib.exporter import CsvItemExporter
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import HtmlXPathSelector
import lxml
import lxml.etree
import lxml.html
@nahali
nahali / error messages
Created September 4, 2013 10:32
after typing: scrapy crawl vinNico3 -o fahed1.csv -t csv
37, in maybeDeferred
result = f(*args, **kw)
File "C:\Python27\lib\site-packages\scrapy-0.16.5-py2.7.egg\scrapy\xli
b\pydispatch\robustapply.py", line 47, in robustApply
return receiver(*arguments, **named)
File "C:\Python27\lib\site-packages\scrapy-0.16.5-py2.7.egg\scrapy\con
trib\feedexport.py", line 190, in item_scraped
slot = self.slots[spider]
exceptions.KeyError: <MySpider 'vinNico3' at 0x3d27690>
@nahali
nahali / settings.py
Last active December 22, 2015 06:59
BOT_NAME = 'ProjetVinNicolas3'
SPIDER_MODULES = ['ProjetVinNicolas3.spiders']
NEWSPIDER_MODULE = 'ProjetVinNicolas3.spiders'
ITEM_PIPELINES = ['scrapy.contrib.pipeline.images.ImagesPipeline', 'ProjetVinNicolas3.pipelines.MyImagesPipeline']
IMAGE_STORE = '/img'
@nahali
nahali / pipelines.py
Last active December 22, 2015 06:59
from scrapy.contrib.pipeline.images import ImagesPipeline
from scrapy.exceptions import DropItem
from scrapy.http import Request
class MyImagesPipeline(ImagesPipeline):
def get_media_requests(self, item, info):
for image_url in item['image_urls']:
yield Request(image_url)