Created
August 25, 2016 08:11
-
-
Save briehanlombaard/b19885f99b9ee26363070fd9883695b9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
from io import BytesIO | |
from PIL import Image, ImageOps | |
from scrapy.pipelines.images import ImagesPipeline | |
class ArticleItem(scrapy.Item): | |
image_urls = scrapy.Field() | |
images = scrapy.Field() | |
class ArticleImagesPipeline(ImagesPipeline): | |
def convert_image(self, image, size=None): | |
if image.format == 'PNG' and image.mode == 'RGBA': | |
background = Image.new('RGBA', image.size, (255, 255, 255)) | |
background.paste(image, image) | |
image = background.convert('RGB') | |
elif image.mode != 'RGB': | |
image = image.convert('RGB') | |
if size: | |
# Use ImageOps.fit instead of image.thumbnail so that we get an | |
# image with the exact dimensions given by `size`. | |
image = ImageOps.fit(image, size, Image.ANTIALIAS) | |
buf = BytesIO() | |
image.save(buf, 'JPEG') | |
return image, buf | |
class ArticleSpider(scrapy.Spider): | |
name = 'article' | |
custom_settings = { | |
'ITEM_PIPELINES': { | |
'spider.ArticleImagesPipeline': 100, | |
}, | |
'IMAGES_STORE': 'articles', | |
'IMAGES_THUMBS': { | |
'small': (290, 150), | |
'medium': (610, 320), | |
}, | |
'AUTOTHROTTLE_ENABLED': True, | |
} | |
def start_requests(self): | |
yield scrapy.Request(u'http://www.latimes.com/travel/la-trb-ellis-island-video-20160621-premiumvideo.html') | |
def parse(self, response): | |
yield ArticleItem(image_urls=['http://www.trbimg.com/img-57699896/turbine/la-trb-ellis-island-video-20160621']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment