Created
July 25, 2016 15:33
-
-
Save swmcc/ef2b4dfc50504acae73b5bc53b38b526 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import glob | |
import struct | |
from boto.s3.connection import S3Connection | |
from multiprocessing.pool import ThreadPool | |
def files(): | |
return glob.glob("tmp/inventory/*.gz") | |
def is_valid(file): | |
with open(file, 'rb') as f: | |
f.seek(-4, 2) | |
return True if struct.unpack('I', f.read(4))[0] > 0 else False | |
def group_files(sequence, chunk_size): | |
return list(zip(*[iter(sequence)] * chunk_size)) | |
if __name__ == "__main__": | |
for grouped_files in group_files(files(), 10): | |
for file in grouped_files: | |
if is_valid(file): | |
print("Sending %s to s3" % (file)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment