Skip to content

Instantly share code, notes, and snippets.

@nicolas-modsy
Created July 10, 2019 17:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nicolas-modsy/76ae911e6e0a9bf1ac3678d785e618c7 to your computer and use it in GitHub Desktop.
Save nicolas-modsy/76ae911e6e0a9bf1ac3678d785e618c7 to your computer and use it in GitHub Desktop.
Generate Athena / Hive query to add partitions
# All partitions in time range are added in one query
from datetime import timedelta, date
def daterange(start_date, end_date):
for n in range(int ((end_date - start_date).days)):
yield start_date + timedelta(n)
start_date = date(2019, 1, 1)
end_date = date(2019, 7, 1)
# main
partition_statement = ("PARTITION (region='us-west-2', year='2019', month='{month}', day='{day}') "
"LOCATION 's3://aws-logs-052457202381-us-west-2/AWSLogs/052457202381/CloudTrail/us-west-2/2019/{month}/{day}'")
partitions = []
for single_date in daterange(start_date, end_date):
print single_date.strftime("%Y-%m-%d")
partition = partition_statement.format(month=single_date.strftime("%m"), day=single_date.strftime("%d"))
partitions.append(partition)
partitions_str = '\n'.join(partitions)
alter_table_statement = "ALTER TABLE cloudtrail_log ADD IF NOT EXISTS {partitions}"
alter_table_statement = alter_table_statement.format(partitions=partitions_str)
print alter_table_statement
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment