Skip to content

Instantly share code, notes, and snippets.

@andymotta
Created August 9, 2017 03:21
Show Gist options
  • Save andymotta/12e2a059d9b86b4a9a4b0a7df33eeb98 to your computer and use it in GitHub Desktop.
Save andymotta/12e2a059d9b86b4a9a4b0a7df33eeb98 to your computer and use it in GitHub Desktop.
Watch a directory for changes with Python Watchdog then multipart upload to S3
import sys
import os
import time
from watchdog.observers import Observer
from watchdog.events import FileModifiedEvent, FileCreatedEvent
import boto3
import mimetypes
from botocore.exceptions import ClientError
# Create an S3 client
s3 = boto3.client('s3')
bucket_names = ['bucket1', 'bucket2']
class go2s3:
def dispatch(self, event):
filename = os.path.basename(event.src_path)
ct = mimetypes.guess_type(filename)[0] or 'binary/octet-stream'
if type(event) in [FileModifiedEvent, FileCreatedEvent]:
print "Event: %s" % event
# Uploads the given file using a managed uploader, which will split up large
# files automatically and upload parts in parallel.
for bucket in bucket_names:
try:
s3.upload_file(Bucket=bucket, Filename=event.src_path, Key=filename, ExtraArgs={'ContentType': ct, 'ServerSideEncryption': 'AES256'})
print "Uploaded %s to %s successfully" % (filename, bucket)
except ClientError as e:
print "Upload of %s to %s failed with: %s" % (filename, bucket, e)
# print dir(event)
if __name__ == "__main__":
path = sys.argv[1] if len(sys.argv) > 1 else '.'
if not os.path.exists(path):
os.makedirs(path)
event_handler = go2s3()
observer = Observer()
observer.schedule(event_handler, path, recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
[Unit]
Description=Watch /_dir_to_watch/ for new/modified files then upload to s3
[Service]
Type=simple
User=root
Group=root
Restart=on-failure
ExecStart=/usr/bin/python /root/bin/go2s3.py /_dir_to_watch/
[Install]
WantedBy=multi-user.target
@Gorparth
Copy link

I've done similar thing with google drive, the problem i'm facing is that whenever the modified event occurs it is giving name as some output stream name like this '.goutputstream-C83610', different for every file , what i want is filename for uploading to cloud. will you please share some solution? thnx

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment