webknjaz/gmail.py

## gmail.py
# -*- coding: utf-8 -*-

import base64

from items import EmailItem, EmailLabelItem
from loader import JSONItemLoader
from oauth2spider import OAuth2Spider

class GmailSpider(OAuth2Spider):
    name = "gmail"
    provider = 'google'
    allowed_domains = ["googleapis.com"]
    api_url = 'https://www.googleapis.com/gmail/v1/users/me'
    credential_type = 'Bearer'
    refresh_url = 'https://www.googleapis.com/oauth2/v3/token'
    #max_items = 5
    start_verbs = [
        ('list', 'messages'),
        ('list', 'labels'),
    ]

    def parse_list_messages(self, response):
        # get messages from list
        for message in response.obj['messages']:
            url = self.build_url_from_verb('get', 'messages')
            url = '{0}/{1}'.format(url, message['id'])
            credentials = self.credentials_from(response)
            request = self.build_request(url, credentials, 'get', 'messages')
            yield request
        # get additional pages
        pageToken = response.obj.get('nextPageToken')
        if pageToken:
            credentials = self.credentials_from(response)
            url = self.build_url_from_verb('list', 'messages')
            url = '{0}?pageToken={1}'.format(url, pageToken)
            request = self.build_request(url, credentials, 'list', 'messages')
            yield request

    def parse_get_messages(self, response):
        # parse a single message
        l = JSONItemLoader(item=EmailItem(), response=response,
                           selector='payload')
        l.add_value('subject', 'headers[name=Subject].value')
        l.add_value('received', 'headers[name=Delivery-date].value')
        l.add_value('sent', 'headers[name=Date].value')
        l.add_value('from_email', 'headers[name=From].value')
        l.add_value('to_email', 'headers[name=To].value')
        l.add_value('mime', 'mimeType')
        l.add_value('text', 'parts[mimeType=text/plain].body.data')
        item = l.load_item()
        item['labels'] = '.'.join(response.obj.get('labelIds', []))
        item['is_read'] = 'UNREAD' not in item['labels']
        item['threadid'] = response.obj.get('threadId')
        # http://stackoverflow.com/a/24481560/890242
        text = item['text']
        text = text.replace('-', '+')
        text = text.replace('_', '/')
        item['text'] = base64.decodestring(text)
        item['text_length'] = len(text)
        yield item

    def parse_list_labels(self, response):
        # parse list of labels
        for label in response.obj['labels']:
            l = JSONItemLoader(item=EmailLabelItem(), obj=label)
            l.add_value('itemid', 'id')
            l.add_value('name', 'name')
            l.add_value('messagesTotal', 'messagesTotal')
            l.add_value('messagesUnread', 'messagesUnread')
            item = l.load_item()
            yield item

## items.py
class EmailItem(scrapy.Item):
    user = scrapy.Field()
    threadid = scrapy.Field()
    from_email = scrapy.Field()
    to_email = scrapy.Field()
    subject = scrapy.Field()
    text = scrapy.Field()
    is_read = scrapy.Field()
    labels = scrapy.Field()
    received = scrapy.Field()
    sent = scrapy.Field()
    labels = scrapy.Field()
    mime = scrapy.Field()
    text_length = scrapy.Field()

class EmailLabelItem(scrapy.Item):
    user = scrapy.Field()
    itemid = scrapy.Field()
    name = scrapy.Field()
    messagesTotal = scrapy.Field()
    messagesUnread = scrapy.Field()


## loader.py
import json
import re

from scrapy.contrib.loader import ItemLoader


class JSONItemLoader(ItemLoader):
    """
    item loader for json

    if a selector is given, this is the default selector applied to
    all subsequent add_value() calls.
    """
    def __init__(self, item=None, selector=None, response=None, obj=None,
                 **context):
        self.item = item
        self.selector = selector or ''
        self.response = response
        self.context = context
        context.update(selector=selector, response=response)
        self.obj = obj or self.get_value(self.json_obj, selector)
        self._values = {}

    @property
    def json_obj(self):
        return json.loads(self.response.body)

    def get_value(self, obj, selector):
        """
        in dict obj access subsequent keys given by selector.

        A selector is a sequence of key within the dict. Returns '' if
        a key is not found

        obj = { 'foo' : { 'bar' : 'baz' }}
        get_value(obj, 'foo.bar')
        => 'baz'

        Selectors can specify filter criteria for dicts in sublists, e.g.

        obj = { 'foo' : [
                    {
                     'name' : 'foobar',
                     'value' : 'foofoo'
                    },
                    ...
               }
        get_value(obj, 'foo[name=foobar].value')
        => 'foofoo'
        """
        selector = selector or ''
        value = obj
        for key in selector.split('.'):
            # if key is empty, stop
            if not key:
                break
            # extract filter, if any
            criteria = None
            if '[' in key and ']' in key:
                key, k, v = re.search('(.*)\[(.*)=(.*)\]', key).groups()
                criteria = k, v
            # see if we have a dict to get value from
            if isinstance(value, dict):
                value = value.get(key, '')
            else:
                # nothing else to process, stop
                value = ''
                break
            # apply filter to list
            if criteria and isinstance(value, list):
                k, v = criteria
                value = filter(lambda i : i.get(k) == v, value)
                if len(value):
                    value = value[0]
        return value

    def add_value(self, field, selector):
        """
        get the value given by selector
        """
        self._values.update({ field : self.get_value(self.obj, selector)})

    def load_item(self):
        """
        return the item loaded with all values previously added by add_value()
        """
        for k, v in self._values.iteritems():
            self.item[k] = v
        return self.item


## restspider.py
import json

from scrapy import log
import scrapy
from scrapy.http.request import Request

from scrapytest.scrapydjango import setup_django


class OAuth2Spider(scrapy.Spider):
    """
    An API spider for OAuth2 REST resources

    works with allauth.SocialTokens. User needs to have authorized
    API access before via django-allauth.

    Configuration:

    api_url = the start URI
    credential_type = first string in Authorization header
    provider = provider name in social app
    refresh_url = the refresh url to refresh OAuth tokens

    Build API requests using

    request = self.build_request(url, api_method, api_type,
                                 credentials)

    Parse responses by implementing methods of format

    parse_<api_method>_<api_type>

    e.g. parse_list_messages(response)
         parse_get_messages(response)

    response objects provide the .api_method and .api_type attributes.
    You can build a new request from a response as follows:

    credentials = self.credentials_from(response)
    request = self.build_request(url, method, type, credentials)
    """
    api_url = ''
    refresh_url = ''
    provider = ''
    credential_type = 'Bearer'
    max_items = 0
    start_verbs = []
    trailing_slash = False

    def start_requests(self):
        """
        build initial requests

        this builds initial requests from .api_url and start_verbs. start_verbs
        is a list of tuples ('verb', 'resource'), where verb is the semantic
        method for the api, resource is the name of the resource

        e.g. start_verbs = [('list', 'messages'), ('list', 'labels')]
        => GET <api_url>/messages  with api_method=list, api_type=messages
        => GET <api_url>/labels    with api_method=list, api_type=labels

        Using start_verbs you can have multiple types of index resources
        queried.

        If start_verbs is empty, it defaults to [('list', '<resource>')] where
        <resource> is the last verb in api_url.

        e.g. api_url = 'some.domain.com/api/messages'
        => start_verbs = [('list', 'messages')]

        see http://doc.scrapy.org/en/latest/topics/spiders.html?highlight=spider#scrapy.spider.Spider.start_requests
        """
        for credential in self.get_credentials():
            for api_method, api_type in self.get_start_verbs():
                url = self.build_url_from_verb(api_method, api_type)
                request = self.build_request(url,
                                             api_method=api_method,
                                             api_type=api_type,
                                             credentials=credential)
                yield request

    def get_start_verbs(self):
        if not len(self.start_verbs):
            self.start_verbs = [('list', '')]
        return self.start_verbs

    def build_url_from_verb(self, api_method, api_type, url=None):
        """
        from a given api_method and api_type build the actual
        url to call.

        the url is of the form <api_url>/<method>/<type>

        :param api_method: the api semantic method (e.g. list, get etc.)
        :param api_type: the api resource type
        :param url: the base url. defaults to .api_url
        """
        base_url = url or self.api_url
        if api_type:
            url = '{0}/{1}'.format(base_url, api_type)
        else:
            url = base_url
            path = base_url.split('/')
            api_type = path[-1] or path[-2]
        if self.trailing_slash and url[-1] != '/':
            url = '%s/' % url
        return url

    def get_credentials(self):
        """
        list all credentials for which to access the api_url

        returns an iterable of credentials. each credential is
        of format <credential_type> <token>. Default implementation
        returns credentials for all SocialTokens that match self.provider
        """
        from allauth.socialaccount.models import SocialToken
        for token in SocialToken.objects.filter(app__provider=self.provider):
            credentials = '{0} {token}'.format(self.credential_type,
                                               **token.__dict__)
            yield credentials


    def refresh_kwargs(self, token):
        """
        return the dict of values required to build a
        refresh token request

        :param token: the SocialToken instance
        """
        return {
          'client_id' : token.app.client_id,
          'refresh_token' : token.token_secret,
          'client_secret' : token.app.secret,
        }

    def refresh_token(self, request):
        """
        refresh the access token in a request

        called by error() in case of a 401 response. This assumes
        the token has expired and attempts to refresh it
        """
        # get current token from request
        from requests_oauthlib import OAuth2Session
        token_value = self.credentials_from(request)
        token_value = token_value.replace(self.credential_type, '')
        try:
            # get SocialToken and app credentials from django db
            setup_django()
            from allauth.socialaccount.models import SocialToken
            token = SocialToken.objects.get(token=token_value.lstrip().strip())
            app = token.app
        except:
            raise
        # create an oauth2session with auto refresh url
        # see http://requests-oauthlib.readthedocs.org/en/latest/oauth2_workflow.html#third-recommended-define-automatic-token-refresh-and-update
        # and http://requests-oauthlib.readthedocs.org/en/latest/api.html#requests_oauthlib.OAuth2Session.refresh_token
        try:
            refresh_kwargs = self.refresh_kwargs(token)
            client = OAuth2Session(app.client_id, token=token.token)
            resp = client.refresh_token(self.refresh_url,
                                        **refresh_kwargs)
        except:
            raise
        else:
            # we finally have a new token, save it
            token.token = resp['access_token']
            token.save()
            # rebuild the request and schedule for download
            credentials = self.credentials_from(request)
            request = self.build_request(request.url,
                                         api_method=request.api_method,
                                         api_type=request.api_type,
                                         credentials=credentials)
            return [request]
        return []

    def error(self, failure):
        """
        process error

        if error is 401, assume we need a new token, call refresh_token()
        otherwise print error to log
        """
        try:
            response = failure.value.response
        except:
            pass
        else:
            if response.status == 401:
                self.refresh_token(failure.request)
        self.log(failure.value, level=log.ERROR)

    def build_request(self, url, credentials=None, api_method=None,
                      api_type=None):
        """
        build a request object

        :param url: the url to access
        :param credentials: the credentials (to be set in Authorization header)
        :param api_method: semantic api method (e.g. list, get)
        :param api_type: semantic api type (e.g. messages, threads)
        """
        request = scrapy.Request(url,
                                 headers={'Authorization' : credentials },
                                 errback=self.error,
                                 callback=self.parse)
        request.api_method = api_method
        request.api_type = api_type
        return request

    def credentials_from(self, request_or_response, header=False):
        """
        get the credentials from the authorization headers.

        this ensures we can build subsequent Requests from a previous
        request or response.

        :return: Authorization header value (header=True => the full header)
        """
        rr = request_or_response
        request = rr if isinstance(rr, Request) else rr.request
        credentials = request.headers.get('Authorization')
        if header:
            credentials = {'Authorization' : credentials }
        return credentials

    def parser_for(self, request):
        """
        return the method that can parse this request. defaults
        to

        parse_<api_type>_<api_method>

        where api_type is the
        """
        url_type = 'parse_{0}_{1}'.format(request.api_method,
                                    request.api_type)
        return getattr(self, url_type)

    def parse(self, response):
        """
        parse api response.

        1. from the body create an obj by deserializing it into a python dict
        2. find the parser for the response by checking the
        """
        response.obj = json.loads(response.body)
        parser = self.parser_for(response.request)
        self.log('%s - %s' % (parser.__name__, response), level=log.DEBUG)
        for i, r in enumerate(parser(response)):
            self.log('> %s - %s' % (parser.__name__, r), level=log.DEBUG)
            if self.max_items == 0 or i < self.max_items:
                if self.trailing_slash and isinstance(r, Request):
                    if r.url[-1] != '/':
                        r.url = '%s/' % r.url
                yield r


## setupdjango.py
# add in your scrapy app's settings.py
    from app import settings as dj_app_settings
    from django.conf import settings as dj_conf
    if not dj_conf.configured:
        # https://docs.djangoproject.com/en/dev/topics/settings/#using-settings-without-setting-django-settings-module
        dj_conf.configure(**dj_app_settings.__dict__)
        from django.apps import apps
        apps.populate(dj_conf.INSTALLED_APPS)
	# -- coding: utf-8 --

	import base64

	from items import EmailItem, EmailLabelItem
	from loader import JSONItemLoader
	from oauth2spider import OAuth2Spider

	class GmailSpider(OAuth2Spider):
	name = "gmail"
	provider = 'google'
	allowed_domains = ["googleapis.com"]
	api_url = 'https://www.googleapis.com/gmail/v1/users/me'
	credential_type = 'Bearer'
	refresh_url = 'https://www.googleapis.com/oauth2/v3/token'
	#max_items = 5
	start_verbs = [
	('list', 'messages'),
	('list', 'labels'),
	]

	def parse_list_messages(self, response):
	# get messages from list
	for message in response.obj['messages']:
	url = self.build_url_from_verb('get', 'messages')
	url = '{0}/{1}'.format(url, message['id'])
	credentials = self.credentials_from(response)
	request = self.build_request(url, credentials, 'get', 'messages')
	yield request
	# get additional pages
	pageToken = response.obj.get('nextPageToken')
	if pageToken:
	credentials = self.credentials_from(response)
	url = self.build_url_from_verb('list', 'messages')
	url = '{0}?pageToken={1}'.format(url, pageToken)
	request = self.build_request(url, credentials, 'list', 'messages')
	yield request

	def parse_get_messages(self, response):
	# parse a single message
	l = JSONItemLoader(item=EmailItem(), response=response,
	selector='payload')
	l.add_value('subject', 'headers[name=Subject].value')
	l.add_value('received', 'headers[name=Delivery-date].value')
	l.add_value('sent', 'headers[name=Date].value')
	l.add_value('from_email', 'headers[name=From].value')
	l.add_value('to_email', 'headers[name=To].value')
	l.add_value('mime', 'mimeType')
	l.add_value('text', 'parts[mimeType=text/plain].body.data')
	item = l.load_item()
	item['labels'] = '.'.join(response.obj.get('labelIds', []))
	item['is_read'] = 'UNREAD' not in item['labels']
	item['threadid'] = response.obj.get('threadId')
	# http://stackoverflow.com/a/24481560/890242
	text = item['text']
	text = text.replace('-', '+')
	text = text.replace('_', '/')
	item['text'] = base64.decodestring(text)
	item['text_length'] = len(text)
	yield item

	def parse_list_labels(self, response):
	# parse list of labels
	for label in response.obj['labels']:
	l = JSONItemLoader(item=EmailLabelItem(), obj=label)
	l.add_value('itemid', 'id')
	l.add_value('name', 'name')
	l.add_value('messagesTotal', 'messagesTotal')
	l.add_value('messagesUnread', 'messagesUnread')
	item = l.load_item()
	yield item
	class EmailItem(scrapy.Item):
	user = scrapy.Field()
	threadid = scrapy.Field()
	from_email = scrapy.Field()
	to_email = scrapy.Field()
	subject = scrapy.Field()
	text = scrapy.Field()
	is_read = scrapy.Field()
	labels = scrapy.Field()
	received = scrapy.Field()
	sent = scrapy.Field()
	labels = scrapy.Field()
	mime = scrapy.Field()
	text_length = scrapy.Field()

	class EmailLabelItem(scrapy.Item):
	user = scrapy.Field()
	itemid = scrapy.Field()
	name = scrapy.Field()
	messagesTotal = scrapy.Field()
	messagesUnread = scrapy.Field()
	import json
	import re

	from scrapy.contrib.loader import ItemLoader


	class JSONItemLoader(ItemLoader):
	"""
	item loader for json

	if a selector is given, this is the default selector applied to
	all subsequent add_value() calls.
	"""
	def __init__(self, item=None, selector=None, response=None, obj=None,
	**context):
	self.item = item
	self.selector = selector or ''
	self.response = response
	self.context = context
	context.update(selector=selector, response=response)
	self.obj = obj or self.get_value(self.json_obj, selector)
	self._values = {}

	@property
	def json_obj(self):
	return json.loads(self.response.body)

	def get_value(self, obj, selector):
	"""
	in dict obj access subsequent keys given by selector.

	A selector is a sequence of key within the dict. Returns '' if
	a key is not found

	obj = { 'foo' : { 'bar' : 'baz' }}
	get_value(obj, 'foo.bar')
	=> 'baz'

	Selectors can specify filter criteria for dicts in sublists, e.g.

	obj = { 'foo' : [
	{
	'name' : 'foobar',
	'value' : 'foofoo'
	},
	...
	}
	get_value(obj, 'foo[name=foobar].value')
	=> 'foofoo'
	"""
	selector = selector or ''
	value = obj
	for key in selector.split('.'):
	# if key is empty, stop
	if not key:
	break
	# extract filter, if any
	criteria = None
	if '[' in key and ']' in key:
	key, k, v = re.search('(.)\[(.)=(.*)\]', key).groups()
	criteria = k, v
	# see if we have a dict to get value from
	if isinstance(value, dict):
	value = value.get(key, '')
	else:
	# nothing else to process, stop
	value = ''
	break
	# apply filter to list
	if criteria and isinstance(value, list):
	k, v = criteria
	value = filter(lambda i : i.get(k) == v, value)
	if len(value):
	value = value[0]
	return value

	def add_value(self, field, selector):
	"""
	get the value given by selector
	"""
	self._values.update({ field : self.get_value(self.obj, selector)})

	def load_item(self):
	"""
	return the item loaded with all values previously added by add_value()
	"""
	for k, v in self._values.iteritems():
	self.item[k] = v
	return self.item
	import json

	from scrapy import log
	import scrapy
	from scrapy.http.request import Request

	from scrapytest.scrapydjango import setup_django


	class OAuth2Spider(scrapy.Spider):
	"""
	An API spider for OAuth2 REST resources

	works with allauth.SocialTokens. User needs to have authorized
	API access before via django-allauth.

	Configuration:

	api_url = the start URI
	credential_type = first string in Authorization header
	provider = provider name in social app
	refresh_url = the refresh url to refresh OAuth tokens

	Build API requests using

	request = self.build_request(url, api_method, api_type,
	credentials)

	Parse responses by implementing methods of format

	parse_<api_method>_<api_type>

	e.g. parse_list_messages(response)
	parse_get_messages(response)

	response objects provide the .api_method and .api_type attributes.
	You can build a new request from a response as follows:

	credentials = self.credentials_from(response)
	request = self.build_request(url, method, type, credentials)
	"""
	api_url = ''
	refresh_url = ''
	provider = ''
	credential_type = 'Bearer'
	max_items = 0
	start_verbs = []
	trailing_slash = False

	def start_requests(self):
	"""
	build initial requests

	this builds initial requests from .api_url and start_verbs. start_verbs
	is a list of tuples ('verb', 'resource'), where verb is the semantic
	method for the api, resource is the name of the resource

	e.g. start_verbs = [('list', 'messages'), ('list', 'labels')]
	=> GET <api_url>/messages with api_method=list, api_type=messages
	=> GET <api_url>/labels with api_method=list, api_type=labels

	Using start_verbs you can have multiple types of index resources
	queried.

	If start_verbs is empty, it defaults to [('list', '<resource>')] where
	<resource> is the last verb in api_url.

	e.g. api_url = 'some.domain.com/api/messages'
	=> start_verbs = [('list', 'messages')]

	see http://doc.scrapy.org/en/latest/topics/spiders.html?highlight=spider#scrapy.spider.Spider.start_requests
	"""
	for credential in self.get_credentials():
	for api_method, api_type in self.get_start_verbs():
	url = self.build_url_from_verb(api_method, api_type)
	request = self.build_request(url,
	api_method=api_method,
	api_type=api_type,
	credentials=credential)
	yield request

	def get_start_verbs(self):
	if not len(self.start_verbs):
	self.start_verbs = [('list', '')]
	return self.start_verbs

	def build_url_from_verb(self, api_method, api_type, url=None):
	"""
	from a given api_method and api_type build the actual
	url to call.

	the url is of the form <api_url>/<method>/<type>

	:param api_method: the api semantic method (e.g. list, get etc.)
	:param api_type: the api resource type
	:param url: the base url. defaults to .api_url
	"""
	base_url = url or self.api_url
	if api_type:
	url = '{0}/{1}'.format(base_url, api_type)
	else:
	url = base_url
	path = base_url.split('/')
	api_type = path[-1] or path[-2]
	if self.trailing_slash and url[-1] != '/':
	url = '%s/' % url
	return url

	def get_credentials(self):
	"""
	list all credentials for which to access the api_url

	returns an iterable of credentials. each credential is
	of format <credential_type> <token>. Default implementation
	returns credentials for all SocialTokens that match self.provider
	"""
	from allauth.socialaccount.models import SocialToken
	for token in SocialToken.objects.filter(app__provider=self.provider):
	credentials = '{0} {token}'.format(self.credential_type,
	**token.__dict__)
	yield credentials


	def refresh_kwargs(self, token):
	"""
	return the dict of values required to build a
	refresh token request

	:param token: the SocialToken instance
	"""
	return {
	'client_id' : token.app.client_id,
	'refresh_token' : token.token_secret,
	'client_secret' : token.app.secret,
	}

	def refresh_token(self, request):
	"""
	refresh the access token in a request

	called by error() in case of a 401 response. This assumes
	the token has expired and attempts to refresh it
	"""
	# get current token from request
	from requests_oauthlib import OAuth2Session
	token_value = self.credentials_from(request)
	token_value = token_value.replace(self.credential_type, '')
	try:
	# get SocialToken and app credentials from django db
	setup_django()
	from allauth.socialaccount.models import SocialToken
	token = SocialToken.objects.get(token=token_value.lstrip().strip())
	app = token.app
	except:
	raise
	# create an oauth2session with auto refresh url
	# see http://requests-oauthlib.readthedocs.org/en/latest/oauth2_workflow.html#third-recommended-define-automatic-token-refresh-and-update
	# and http://requests-oauthlib.readthedocs.org/en/latest/api.html#requests_oauthlib.OAuth2Session.refresh_token
	try:
	refresh_kwargs = self.refresh_kwargs(token)
	client = OAuth2Session(app.client_id, token=token.token)
	resp = client.refresh_token(self.refresh_url,
	**refresh_kwargs)
	except:
	raise
	else:
	# we finally have a new token, save it
	token.token = resp['access_token']
	token.save()
	# rebuild the request and schedule for download
	credentials = self.credentials_from(request)
	request = self.build_request(request.url,
	api_method=request.api_method,
	api_type=request.api_type,
	credentials=credentials)
	return [request]
	return []

	def error(self, failure):
	"""
	process error

	if error is 401, assume we need a new token, call refresh_token()
	otherwise print error to log
	"""
	try:
	response = failure.value.response
	except:
	pass
	else:
	if response.status == 401:
	self.refresh_token(failure.request)
	self.log(failure.value, level=log.ERROR)

	def build_request(self, url, credentials=None, api_method=None,
	api_type=None):
	"""
	build a request object

	:param url: the url to access
	:param credentials: the credentials (to be set in Authorization header)
	:param api_method: semantic api method (e.g. list, get)
	:param api_type: semantic api type (e.g. messages, threads)
	"""
	request = scrapy.Request(url,
	headers={'Authorization' : credentials },
	errback=self.error,
	callback=self.parse)
	request.api_method = api_method
	request.api_type = api_type
	return request

	def credentials_from(self, request_or_response, header=False):
	"""
	get the credentials from the authorization headers.

	this ensures we can build subsequent Requests from a previous
	request or response.

	:return: Authorization header value (header=True => the full header)
	"""
	rr = request_or_response
	request = rr if isinstance(rr, Request) else rr.request
	credentials = request.headers.get('Authorization')
	if header:
	credentials = {'Authorization' : credentials }
	return credentials

	def parser_for(self, request):
	"""
	return the method that can parse this request. defaults
	to

	parse_<api_type>_<api_method>

	where api_type is the
	"""
	url_type = 'parse_{0}_{1}'.format(request.api_method,
	request.api_type)
	return getattr(self, url_type)

	def parse(self, response):
	"""
	parse api response.

	1. from the body create an obj by deserializing it into a python dict
	2. find the parser for the response by checking the
	"""
	response.obj = json.loads(response.body)
	parser = self.parser_for(response.request)
	self.log('%s - %s' % (parser.__name__, response), level=log.DEBUG)
	for i, r in enumerate(parser(response)):
	self.log('> %s - %s' % (parser.__name__, r), level=log.DEBUG)
	if self.max_items == 0 or i < self.max_items:
	if self.trailing_slash and isinstance(r, Request):
	if r.url[-1] != '/':
	r.url = '%s/' % r.url
	yield r
	# add in your scrapy app's settings.py
	from app import settings as dj_app_settings
	from django.conf import settings as dj_conf
	if not dj_conf.configured:
	# https://docs.djangoproject.com/en/dev/topics/settings/#using-settings-without-setting-django-settings-module
	dj_conf.configure(**dj_app_settings.__dict__)
	from django.apps import apps
	apps.populate(dj_conf.INSTALLED_APPS)