Skip to content

Instantly share code, notes, and snippets.

@sivel
Created May 18, 2013 00:08
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sivel/5602713 to your computer and use it in GitHub Desktop.
Save sivel/5602713 to your computer and use it in GitHub Desktop.
Sync a remote Rackspace CloadFiles container to a local directory
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2012 Rackspace
# Copyright 2013 Matt Martz
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import pyrax
import dateutil.parser
import os
import pyrax.utils as utils
class CFClientCont2Folder(pyrax.cf_wrapper.client.CFClient):
def sync_container_to_folder(self, container, folder_path, delete=False,
include_hidden=False, ignore=None,
ignore_timestamps=False):
"""
Compares the contents of the specified container, and checks to make
sure that the corresponding object is present in the specified folder.
If there is no local file matching the remote object, it is created.
If a matching file exists, the etag is examined to determine if the
object in the container matches the local file; if they differ, the
file is updated with the remote object, if the remote object is newer
when `ignore_timestamps' is False (default). If `ignore_timestamps`
is True, the file is overwritten with the remote object contents
whenever the etags differ. NOTE: the timestamp of a remote object is
the time it was uploaded, not the original modification time of the
file stored in that object. Unless 'include_hidden' is True, files
beginning with an initial period are ignored.
If the 'delete' option is True, any files in the folder that do
not have corresponding objects in the container are deleted.
You can selectively ignore files by passing either a single pattern or
a list of patterns; these will be applied to the individual folder and
file names, and any names that match any of the 'ignore' patterns will
not be uploaded. The patterns should be standard *nix-style shell
patterns; e.g., '*pyc' will ignore all files ending in 'pyc', such as
'program.pyc' and 'abcpyc'. """
cont = self.get_container(container)
self._remote_files = []
self._sync_container_to_folder(cont, folder_path,
delete=delete,
include_hidden=include_hidden,
ignore=ignore,
ignore_timestamps=ignore_timestamps)
def _sync_container_to_folder(self, cont, folder_path, delete,
include_hidden, ignore, ignore_timestamps):
"""
This is the internal method that is called recursively to handle
nested folder structures.
"""
objects = cont.get_objects(full_listing=True)
ignore = utils.coerce_string_to_list(ignore)
if not include_hidden:
ignore.append(".*")
for obj in objects:
if obj.content_type == 'application/directory':
continue
if utils.match_pattern(obj.name, ignore):
continue
pth = os.path.join(folder_path, obj.name)
self._remote_files.append(obj.name)
obj_etag = obj.etag
local_etag = utils.get_checksum(pth)
fullname = obj.name
if local_etag != obj_etag:
if not ignore_timestamps:
if obj:
obj_time_stamp = int(
dateutil.parser.parse(
obj.last_modified).strftime('%s'))
else:
obj_time_str = -1
try:
local_mod_stamp = os.stat(pth).st_mtime
except OSError:
local_mod_stamp = -1
if obj_time_stamp <= local_mod_stamp:
# local file is newer
continue
contents = cont.fetch_object(fullname)
try:
os.makedirs(
os.path.dirname(
os.path.join(folder_path, fullname)), 0755)
except OSError as e:
if e.errno != 17:
raise
with open(os.path.join(folder_path, fullname), 'w+') as f:
f.write(contents)
os.utime(os.path.join(folder_path, fullname),
(obj_time_stamp, obj_time_stamp))
if delete:
self._delete_files_not_in_list(cont, folder_path, ignore)
def _delete_files_not_in_list(self, cont, folder_path, ignore=None):
"""
Finds all the files that are not present in the specified container
from the self._local_files list, and deletes them.
"""
def _walker(arg, dirname, fnames):
for fname in (nm for nm in fnames
if not utils.match_pattern(nm, ignore)):
full_path = os.path.join(dirname, fname)
if os.path.isdir(full_path):
continue
obj_name = os.path.relpath(full_path, folder_path)
files.append(obj_name)
files = []
os.path.walk(folder_path, _walker, None)
for fname in files:
if fname not in self._remote_files:
os.unlink(os.path.join(folder_path, fname))
def connect_to_cloudfiles(region=None, public=True):
"""
Creates a client for working with cloud files. The default is to connect
to the public URL; if you need to work with the ServiceNet connection, pass
False to the 'public' parameter.
"""
region = pyrax._safe_region(region)
cf_url = pyrax._get_service_endpoint("object_store", region, public=public)
cdn_url = pyrax._get_service_endpoint("object_cdn", region)
ep_type = {True: "publicURL", False: "internalURL"}[public]
opts = {"tenant_id": pyrax.identity.tenant_name,
"auth_token": pyrax.identity.token,
"endpoint_type": ep_type,
"tenant_name": pyrax.identity.tenant_name,
"object_storage_url": cf_url, "object_cdn_url": cdn_url,
"region_name": region}
cloudfiles = CFClientCont2Folder(pyrax.identity.auth_endpoint,
pyrax.identity.username,
pyrax.identity.password,
tenant_name=pyrax.identity.tenant_name,
preauthurl=cf_url,
preauthtoken=pyrax.identity.token,
auth_version="2", os_options=opts,
http_log_debug=pyrax._http_debug)
cloudfiles.user_agent = pyrax._make_agent_name(cloudfiles.user_agent)
return cloudfiles
if __name__ == '__main__':
pyrax.set_credentials(username='myusername', password='myapikey',
region='DFW')
cf = connect_to_cloudfiles(region='DFW')
cf.sync_container_to_folder('some-container',
'/path/to/some/local/directory/')
# vim:set ts=4 sw=4 expandtab:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment