Skip to content

Instantly share code, notes, and snippets.

@swmcc
Created July 25, 2016 15:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save swmcc/ef2b4dfc50504acae73b5bc53b38b526 to your computer and use it in GitHub Desktop.
Save swmcc/ef2b4dfc50504acae73b5bc53b38b526 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import glob
import struct
from boto.s3.connection import S3Connection
from multiprocessing.pool import ThreadPool
def files():
return glob.glob("tmp/inventory/*.gz")
def is_valid(file):
with open(file, 'rb') as f:
f.seek(-4, 2)
return True if struct.unpack('I', f.read(4))[0] > 0 else False
def group_files(sequence, chunk_size):
return list(zip(*[iter(sequence)] * chunk_size))
if __name__ == "__main__":
for grouped_files in group_files(files(), 10):
for file in grouped_files:
if is_valid(file):
print("Sending %s to s3" % (file))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment