Skip to content

Instantly share code, notes, and snippets.

@jasonjho
Last active December 16, 2015 04:23
Show Gist options
  • Save jasonjho/b6e98adf868cca1289da to your computer and use it in GitHub Desktop.
Save jasonjho/b6e98adf868cca1289da to your computer and use it in GitHub Desktop.
Compose multiple file transform functions
from tempfile import NamedTemporaryFile
def lookahead(iterable):
"""Generate pairs (islast, item) with the iterable's items"""
it = iter(iterable)
last = next(it)
for val in it:
yield last, True
last = val
# if last value
yield last, False
def compose(file_tranforms):
def composition(f_in, f_out):
f_handle = f_in
for func, has_more in lookahead(file_tranforms):
temp = f_out if not has_more else NamedTemporaryFile("w+b")
func(f_handle, temp)
f_handle, temp = temp, None
if has_more:
f_handle.seek(0)
return composition
# Let's define some transform functions
# -------------------------------------
# An identity function - performs a load with no transform
def s3_load(s3_hook, s3_key):
def load_to_file(f_src, f_dest):
s3_key_object = s3_hook.get_key(s3_key)
s3_key_object.get_contents_to_file(f_dest)
f_dest.flush()
f_dest.seek(0)
return load_to_file
# compression transform
def compress_gz(f_src, f_dest): pass
# some other transform
def some_other_transform(f_src, f_dest): pass
# Run the composition
# -------------------
with NamedTemporaryFile("w+b") as f_src, NamedTemporaryFile("w+b") as f_dest:
fc = compose([s3_load, compress_gz, some_other_transform])
fc(f_src, f_dest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment