Skip to content

Instantly share code, notes, and snippets.

@christippett
Created February 11, 2019 04:27
Show Gist options
  • Save christippett/ef8946346275ebe01997a69d2c58025f to your computer and use it in GitHub Desktop.
Save christippett/ef8946346275ebe01997a69d2c58025f to your computer and use it in GitHub Desktop.
Migrate GCS bucket(s) from one project/location to another
import uuid
from google.cloud import storage
from google.cloud.storage import blob, bucket
client = storage.Client()
BUCKETS = [
"bucket-a",
"bucket-b",
]
NEW_BUCKET_LOCATION = "australia-southeast1"
NEW_BUCKET_PROJECT = "example-project"
class GoogleCloudStorageMigrationHelper:
def __init__(
self, buckets_to_migrate, new_location, new_project, staging_bucket_name=None
):
self.buckets_to_migrate = buckets_to_migrate
self.new_location = new_location
self.new_project = new_project
if not staging_bucket_name:
staging_bucket_name = "gcs-migration-stage-" + str(uuid.uuid4())[:8]
self.staging_bucket_name = staging_bucket_name
def migrate(self):
staging_bucket = self.get_or_create_bucket(self.staging_bucket_name)
for source_bucket_name in self.buckets_to_migrate:
source_bucket = self.get_or_create_bucket(source_bucket_name)
# Skip migration if bucket already in new location
if (
source_bucket.location
and source_bucket.location.lower() == self.new_location.lower()
):
continue
# TODO: check if bucket already in new project
# TODO: migrate bucket labels
self.copy_objects_to_staging_bucket(
source_bucket=source_bucket, target_bucket=staging_bucket
)
self.delete_bucket(source_bucket)
new_bucket = self.get_or_create_bucket(
source_bucket.name, project=self.new_project, location=self.new_location
)
self.copy_objects_from_staging_bucket(
source_bucket=staging_bucket, target_bucket=new_bucket
)
# Clean up
# self.delete_bucket(staging_bucket)
def get_or_create_bucket(self, bucket_id, **create_kwargs):
bucket_ = bucket.Bucket(client, bucket_id)
if not bucket_.exists():
print(f"Creating bucket: {bucket_.name}")
bucket_.create(**create_kwargs)
bucket_.reload()
return bucket_
def copy_objects_to_staging_bucket(self, source_bucket, target_bucket):
for blob_obj in source_bucket.list_blobs():
object_name = source_bucket.name + "/" + blob_obj.name
self._rewrite_blob(blob_obj, object_name, target_bucket)
def copy_objects_from_staging_bucket(self, source_bucket, target_bucket):
for blob_obj in source_bucket.list_blobs(prefix=target_bucket.name):
_, _, object_name = blob_obj.name.partition("/")
self._rewrite_blob(blob_obj, object_name, target_bucket)
def _rewrite_blob(self, blob_obj, target_name, target_bucket):
new_blob_obj = blob.Blob(target_name, target_bucket)
if not new_blob_obj.exists():
print(f"Copying object: {target_name}")
new_blob_obj.rewrite(blob_obj)
def delete_bucket(self, bucket):
if bucket.exists():
print(f"Deleting bucket: {bucket.name}")
bucket.delete(force=True)
if __name__ == "__main__":
migration_helper = GoogleCloudStorageMigrationHelper(
buckets_to_migrate=BUCKETS,
new_location=NEW_BUCKET_LOCATION,
new_project=NEW_BUCKET_PROJECT,
)
migration_helper.migrate()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment