Last active
August 29, 2015 14:27
-
-
Save ownport/76ba25d0437f9f13240d to your computer and use it in GitHub Desktop.
Scrapy: rethinkdb pipeline, base class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on: | |
# - https://github.com/sprij/scrapy-rethinkdb | |
# | |
# updated by 2015-08-23 | |
import urlparse | |
import rethinkdb as r | |
from rethinkdb.ast import RqlQuery | |
class RethinkDBPipeline(object): | |
@classmethod | |
def from_crawler(cls, crawler): | |
""" Gets settings for the pipeline from the crawler. | |
@param crawler: crawler | |
""" | |
settings = crawler.settings | |
uri = settings.get('RETHINKDB_URI', None) | |
settings = urlparse.urlparse(uri) | |
return cls({'server': settings.netloc, 'db': settings.path.replace('/', '')}) | |
def __init__(self, settings): | |
if not isinstance(settings, dict): | |
raise ValueError('The connection settings should be dictionary type, %s' % type(settings)) | |
self.settings = settings | |
self._conn = r.connect(self.settings['server']) | |
def execute(self, stmt): | |
if not isinstance(stmt, RqlQuery): | |
raise ValueError('Expecting <RqlQuery> instance, got <%s>' % type(stmt)) | |
return stmt.run(self._conn) | |
def insert_item(self, tablename, data): | |
self.execute(r.db(self.settings['db']).table(tablename).insert(data)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment