Skip to content

Instantly share code, notes, and snippets.

@ramondeklein
Created March 2, 2021 18:49
Show Gist options
  • Save ramondeklein/fb016893c500685c976dd1e89a0b5873 to your computer and use it in GitHub Desktop.
Save ramondeklein/fb016893c500685c976dd1e89a0b5873 to your computer and use it in GitHub Desktop.
Rename table in AWS glue (including partition scheme)
# This GIST can be used to rename a table in AWS Glue and keep the
# partitioning scheme intact.
import boto3
region = "eu-west-1"
database_name = "rdk-test"
table_name = "yt_uncompressed"
new_table_name = "nyc_taxiset"
client = boto3.client("glue", region_name=region)
response = client.get_table(DatabaseName=database_name, Name=table_name)
table_input = response["Table"]
table_input["Name"] = new_table_name
table_input.pop("CreatedBy")
table_input.pop("CreateTime")
table_input.pop("UpdateTime")
table_input.pop("DatabaseName")
table_input.pop("IsRegisteredWithLakeFormation")
table_input.pop("CatalogId")
client.create_table(DatabaseName=database_name, TableInput=table_input)
response = client.get_table(DatabaseName=database_name, Name=new_table_name)
table_input = response["Table"]
paginator = client.get_paginator('get_partitions')
response = paginator.paginate(
DatabaseName=database_name,
TableName=table_name
)
partitions = list()
for page in response:
for p in page['Partitions']:
copy = p.copy()
copy.pop("DatabaseName")
copy.pop("TableName")
copy.pop("CreationTime")
copy.pop("CatalogId")
partitions.append(copy)
client.batch_create_partition(
CatalogId=table_input["CatalogId"],
DatabaseName=database_name,
TableName=new_table_name,
PartitionInputList=partitions
)
@msropp
Copy link

msropp commented Dec 14, 2022

This is a super helpful script. Thank you! 🥳

One update (probably a change in AWS since you wrote the original): I had to also do a table_input.pop() on "VersionId". 😄

@duke8585
Copy link

duke8585 commented Jun 27, 2023

This is a super helpful script. Thank you! 🥳

One update (probably a change in AWS since you wrote the original): I had to also do a table_input.pop() on "VersionId". 😄

+1

and you might wanna add a batched add partitions à la

    batch_size = 50  # NOTE must be lower than 100

    while len(partitions) > 0:
        partitions_batch = partitions[0:batch_size]
        del partitions[0:batch_size]

        partition_list = sorted([p.get("Values")[0] for p in partitions_batch])
        print(f"+++\n\n adding partitions to {new_table_name}: {partition_list} \n\n+++")

        client.batch_create_partition(
            CatalogId=table_input["CatalogId"],
            DatabaseName=database_name,
            TableName=new_table_name,
            PartitionInputList=partitions_batch,
        )

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment