Skip to content

Instantly share code, notes, and snippets.

# -*- coding: utf-8 -*-
# Define your item pipelines here
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
class AmazonPipeline(object):
def process_item(self, item, spider):
return item
[
{"product_category": "Electronics,Computers & Accessories,Data Storage,External Hard Drives", "product_sale_price": "$949.95", "product_name": "G-Technology G-SPEED eS PRO High-Performance Fail-Safe RAID Solution for HD/2K Production 8TB (0G01873)", "product_availability": "Only 1 left in stock."},
{"product_category": "Electronics,Computers & Accessories,Data Storage,USB Flash Drives", "product_sale_price": "", "product_name": "G-Technology G-RAID with Removable Drives High-Performance Storage System 4TB (Gen7) (0G03240)", "product_availability": "Available from these sellers."},
{"product_category": "Electronics,Computers & Accessories,Data Storage,USB Flash Drives", "product_sale_price": "$549.95", "product_name": "G-Technology G-RAID USB Removable Dual Drive Storage System 8TB (0G04069)", "product_availability": "Only 1 left in stock."},
{"product_category": "Electronics,Computers & Accessories,Data Storage,External Hard Drives", "product_sale_price": "$89.95", "product_name": "G-Technology G-DRIVE ev U
# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'amazon.pipelines.AmazonPipeline': 300,
}
# -*- coding: utf-8 -*-
import json
import requests
import re
import random
import urllib
import lxml.html
import bs4
import sys
reload(sys)