Skip to content

Instantly share code, notes, and snippets.

@nikhilpi-zz
nikhilpi-zz / buzzfeedNews_spider.py
Created April 2, 2015 01:46
Scrapy Scraper for Buzzfeed
import scrapy
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.lxmlhtml import LxmlLinkExtractor
from buzzLinks.items import BuzzlinksItem
from urlparse import urlparse
# Spider
class DmozSpider(CrawlSpider):
name = "buzzfeedNews"
allowed_domains = ["buzzfeed.com"]