Skip to content

Instantly share code, notes, and snippets.

@gileno
Last active September 18, 2015 22:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gileno/3219ab7caf5be6da5478 to your computer and use it in GitHub Desktop.
Save gileno/3219ab7caf5be6da5478 to your computer and use it in GitHub Desktop.
Blog post VII
# -*- coding: utf-8 -*-
import rethinkdb as r
class RethinkdbPipeline(object):
conn = None
rethinkdb_settings = {}
@classmethod
def from_crawler(cls, crawler):
settings = crawler.settings
rethinkdb_settings = settings.get('RETHINKDB', {})
return cls(rethinkdb_settings)
def __init__(self, rethinkdb_settings):
self.rethinkdb_settings = rethinkdb_settings
def open_spider(self, spider):
if self.rethinkdb_settings:
self.table_name = self.rethinkdb_settings.pop('table_name')
self.db_name = self.rethinkdb_settings['db']
self.conn = r.connect(**self.rethinkdb_settings)
table_list = r.db(self.db_name).table_list().run(
self.conn
)
if self.table_name not in table_list:
r.db(self.db_name).table_create(self.table_name).run(self.conn)
def close_spider(self, spider):
if self.conn:
self.conn.close()
def process_item(self, item, spider):
if self.conn:
r.table(self.table_name).insert(item).run(self.conn)
return item
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment