Skip to content

Instantly share code, notes, and snippets.

@ownport
Last active August 29, 2015 14:27
Show Gist options
  • Save ownport/76ba25d0437f9f13240d to your computer and use it in GitHub Desktop.
Save ownport/76ba25d0437f9f13240d to your computer and use it in GitHub Desktop.
Scrapy: rethinkdb pipeline, base class
# Based on:
# - https://github.com/sprij/scrapy-rethinkdb
#
# updated by 2015-08-23
import urlparse
import rethinkdb as r
from rethinkdb.ast import RqlQuery
class RethinkDBPipeline(object):
@classmethod
def from_crawler(cls, crawler):
""" Gets settings for the pipeline from the crawler.
@param crawler: crawler
"""
settings = crawler.settings
uri = settings.get('RETHINKDB_URI', None)
settings = urlparse.urlparse(uri)
return cls({'server': settings.netloc, 'db': settings.path.replace('/', '')})
def __init__(self, settings):
if not isinstance(settings, dict):
raise ValueError('The connection settings should be dictionary type, %s' % type(settings))
self.settings = settings
self._conn = r.connect(self.settings['server'])
def execute(self, stmt):
if not isinstance(stmt, RqlQuery):
raise ValueError('Expecting <RqlQuery> instance, got <%s>' % type(stmt))
return stmt.run(self._conn)
def insert_item(self, tablename, data):
self.execute(r.db(self.settings['db']).table(tablename).insert(data))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment