Skip to content

Instantly share code, notes, and snippets.

class S3RecordIterator(object):
"""Iterator over Amazon S3. These results are in the
form of multiple files of data and a single file indicating the batch.
This class offers a single generator that handles loading chunks of files
and moving to next files in the batch.
It also checkpoints consumed records internally to safely resume failed
jobs.
"""
def has_unseen_data(self):
@behrooz-jana
behrooz-jana / s3_record_reader.py
Created October 7, 2015 17:37
S3 line reader
# -*- coding: utf-8; -*-
from boto.s3.connection import S3Connection, OrdinaryCallingFormat
import Queue
from time import sleep
from threading import Thread
class InvalidStateError(StandardError):
"""Raised if while fetching data from S3 we encounter an state that
shouldn't have happened. needs direct intervention from engineers.