nramirezuy/gist:e306c7e849a54e1909a7

## gistfile1.py
import collections, json
from urllib import urlretrieve
from urlparse import urljoin
from csv import DictReader, reader as csv_reader

import scrapinghub

from project.settings import SH_APIKEY


BASE_URL = 'http://files.scrapinghub.com.s3.amazonaws.com/feeds/'


def _download(url, _fname=None):
    _fname, _ = urlretrieve(urljoin(BASE_URL, url), _fname)
    return _fname


def _parse(_file, ofmt):
    fext = _file.name.split('.')[-1]
    if fext == 'csv':
        if ofmt == 'dict':
            return DictReader(_file)
        elif ofmt == 'list':
            reader = csv_reader(_file)
            reader.next() # Avoid headers
            return reader
        else:
            raise Exception('Not supported ofmt')
    elif fext == 'jl':
        if ofmt == 'dict':
            return (json.loads(l) for l in _file)
        else:
            raise Exception('Not supported ofmt')
    else:
        raise Exception('Not supported extension')


def _to_unicode(data, encoding='utf-8'):
    if isinstance(data, basestring):
        return data.decode(encoding)
    elif isinstance(data, collections.Mapping):
        return dict(_to_unicode(d, encoding) for d in data.items())
    elif isinstance(data, collections.Iterable):
        return type(data)(_to_unicode(d, encoding) for d in data)
    else:
        return data


def ondemand(ofmt='dict', _url='feed_1.csv', _fname=None):
    with open(_download(_url, _fname)) as f:
        for row in _parse(f, ofmt):
            yield _to_unicode(row)


def companies(ofmt='dict', _url='feed_2.csv', _fname=None):
     with open(_download(_url, _fname)) as f:
        for row in _parse(f, ofmt):
            yield _to_unicode(row)


def items(key):
    job = scrapinghub.Connection(SH_APIKEY)[key.split('/')[0]].job(key)
    for item in job.items():
        yield item
	import collections, json
	from urllib import urlretrieve
	from urlparse import urljoin
	from csv import DictReader, reader as csv_reader

	import scrapinghub

	from project.settings import SH_APIKEY


	BASE_URL = 'http://files.scrapinghub.com.s3.amazonaws.com/feeds/'


	def _download(url, _fname=None):
	_fname, _ = urlretrieve(urljoin(BASE_URL, url), _fname)
	return _fname


	def _parse(_file, ofmt):
	fext = _file.name.split('.')[-1]
	if fext == 'csv':
	if ofmt == 'dict':
	return DictReader(_file)
	elif ofmt == 'list':
	reader = csv_reader(_file)
	reader.next() # Avoid headers
	return reader
	else:
	raise Exception('Not supported ofmt')
	elif fext == 'jl':
	if ofmt == 'dict':
	return (json.loads(l) for l in _file)
	else:
	raise Exception('Not supported ofmt')
	else:
	raise Exception('Not supported extension')


	def _to_unicode(data, encoding='utf-8'):
	if isinstance(data, basestring):
	return data.decode(encoding)
	elif isinstance(data, collections.Mapping):
	return dict(_to_unicode(d, encoding) for d in data.items())
	elif isinstance(data, collections.Iterable):
	return type(data)(_to_unicode(d, encoding) for d in data)
	else:
	return data


	def ondemand(ofmt='dict', _url='feed_1.csv', _fname=None):
	with open(_download(_url, _fname)) as f:
	for row in _parse(f, ofmt):
	yield _to_unicode(row)


	def companies(ofmt='dict', _url='feed_2.csv', _fname=None):
	with open(_download(_url, _fname)) as f:
	for row in _parse(f, ofmt):
	yield _to_unicode(row)


	def items(key):
	job = scrapinghub.Connection(SH_APIKEY)[key.split('/')[0]].job(key)
	for item in job.items():
	yield item