Created
May 18, 2013 00:08
-
-
Save sivel/5602713 to your computer and use it in GitHub Desktop.
Sync a remote Rackspace CloadFiles container to a local directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Copyright 2012 Rackspace | |
# Copyright 2013 Matt Martz | |
# All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); you may | |
# not use this file except in compliance with the License. You may obtain | |
# a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | |
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | |
# License for the specific language governing permissions and limitations | |
# under the License. | |
import pyrax | |
import dateutil.parser | |
import os | |
import pyrax.utils as utils | |
class CFClientCont2Folder(pyrax.cf_wrapper.client.CFClient): | |
def sync_container_to_folder(self, container, folder_path, delete=False, | |
include_hidden=False, ignore=None, | |
ignore_timestamps=False): | |
""" | |
Compares the contents of the specified container, and checks to make | |
sure that the corresponding object is present in the specified folder. | |
If there is no local file matching the remote object, it is created. | |
If a matching file exists, the etag is examined to determine if the | |
object in the container matches the local file; if they differ, the | |
file is updated with the remote object, if the remote object is newer | |
when `ignore_timestamps' is False (default). If `ignore_timestamps` | |
is True, the file is overwritten with the remote object contents | |
whenever the etags differ. NOTE: the timestamp of a remote object is | |
the time it was uploaded, not the original modification time of the | |
file stored in that object. Unless 'include_hidden' is True, files | |
beginning with an initial period are ignored. | |
If the 'delete' option is True, any files in the folder that do | |
not have corresponding objects in the container are deleted. | |
You can selectively ignore files by passing either a single pattern or | |
a list of patterns; these will be applied to the individual folder and | |
file names, and any names that match any of the 'ignore' patterns will | |
not be uploaded. The patterns should be standard *nix-style shell | |
patterns; e.g., '*pyc' will ignore all files ending in 'pyc', such as | |
'program.pyc' and 'abcpyc'. """ | |
cont = self.get_container(container) | |
self._remote_files = [] | |
self._sync_container_to_folder(cont, folder_path, | |
delete=delete, | |
include_hidden=include_hidden, | |
ignore=ignore, | |
ignore_timestamps=ignore_timestamps) | |
def _sync_container_to_folder(self, cont, folder_path, delete, | |
include_hidden, ignore, ignore_timestamps): | |
""" | |
This is the internal method that is called recursively to handle | |
nested folder structures. | |
""" | |
objects = cont.get_objects(full_listing=True) | |
ignore = utils.coerce_string_to_list(ignore) | |
if not include_hidden: | |
ignore.append(".*") | |
for obj in objects: | |
if obj.content_type == 'application/directory': | |
continue | |
if utils.match_pattern(obj.name, ignore): | |
continue | |
pth = os.path.join(folder_path, obj.name) | |
self._remote_files.append(obj.name) | |
obj_etag = obj.etag | |
local_etag = utils.get_checksum(pth) | |
fullname = obj.name | |
if local_etag != obj_etag: | |
if not ignore_timestamps: | |
if obj: | |
obj_time_stamp = int( | |
dateutil.parser.parse( | |
obj.last_modified).strftime('%s')) | |
else: | |
obj_time_str = -1 | |
try: | |
local_mod_stamp = os.stat(pth).st_mtime | |
except OSError: | |
local_mod_stamp = -1 | |
if obj_time_stamp <= local_mod_stamp: | |
# local file is newer | |
continue | |
contents = cont.fetch_object(fullname) | |
try: | |
os.makedirs( | |
os.path.dirname( | |
os.path.join(folder_path, fullname)), 0755) | |
except OSError as e: | |
if e.errno != 17: | |
raise | |
with open(os.path.join(folder_path, fullname), 'w+') as f: | |
f.write(contents) | |
os.utime(os.path.join(folder_path, fullname), | |
(obj_time_stamp, obj_time_stamp)) | |
if delete: | |
self._delete_files_not_in_list(cont, folder_path, ignore) | |
def _delete_files_not_in_list(self, cont, folder_path, ignore=None): | |
""" | |
Finds all the files that are not present in the specified container | |
from the self._local_files list, and deletes them. | |
""" | |
def _walker(arg, dirname, fnames): | |
for fname in (nm for nm in fnames | |
if not utils.match_pattern(nm, ignore)): | |
full_path = os.path.join(dirname, fname) | |
if os.path.isdir(full_path): | |
continue | |
obj_name = os.path.relpath(full_path, folder_path) | |
files.append(obj_name) | |
files = [] | |
os.path.walk(folder_path, _walker, None) | |
for fname in files: | |
if fname not in self._remote_files: | |
os.unlink(os.path.join(folder_path, fname)) | |
def connect_to_cloudfiles(region=None, public=True): | |
""" | |
Creates a client for working with cloud files. The default is to connect | |
to the public URL; if you need to work with the ServiceNet connection, pass | |
False to the 'public' parameter. | |
""" | |
region = pyrax._safe_region(region) | |
cf_url = pyrax._get_service_endpoint("object_store", region, public=public) | |
cdn_url = pyrax._get_service_endpoint("object_cdn", region) | |
ep_type = {True: "publicURL", False: "internalURL"}[public] | |
opts = {"tenant_id": pyrax.identity.tenant_name, | |
"auth_token": pyrax.identity.token, | |
"endpoint_type": ep_type, | |
"tenant_name": pyrax.identity.tenant_name, | |
"object_storage_url": cf_url, "object_cdn_url": cdn_url, | |
"region_name": region} | |
cloudfiles = CFClientCont2Folder(pyrax.identity.auth_endpoint, | |
pyrax.identity.username, | |
pyrax.identity.password, | |
tenant_name=pyrax.identity.tenant_name, | |
preauthurl=cf_url, | |
preauthtoken=pyrax.identity.token, | |
auth_version="2", os_options=opts, | |
http_log_debug=pyrax._http_debug) | |
cloudfiles.user_agent = pyrax._make_agent_name(cloudfiles.user_agent) | |
return cloudfiles | |
if __name__ == '__main__': | |
pyrax.set_credentials(username='myusername', password='myapikey', | |
region='DFW') | |
cf = connect_to_cloudfiles(region='DFW') | |
cf.sync_container_to_folder('some-container', | |
'/path/to/some/local/directory/') | |
# vim:set ts=4 sw=4 expandtab: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment