Created
July 10, 2019 17:37
-
-
Save nicolas-modsy/76ae911e6e0a9bf1ac3678d785e618c7 to your computer and use it in GitHub Desktop.
Generate Athena / Hive query to add partitions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# All partitions in time range are added in one query | |
from datetime import timedelta, date | |
def daterange(start_date, end_date): | |
for n in range(int ((end_date - start_date).days)): | |
yield start_date + timedelta(n) | |
start_date = date(2019, 1, 1) | |
end_date = date(2019, 7, 1) | |
# main | |
partition_statement = ("PARTITION (region='us-west-2', year='2019', month='{month}', day='{day}') " | |
"LOCATION 's3://aws-logs-052457202381-us-west-2/AWSLogs/052457202381/CloudTrail/us-west-2/2019/{month}/{day}'") | |
partitions = [] | |
for single_date in daterange(start_date, end_date): | |
print single_date.strftime("%Y-%m-%d") | |
partition = partition_statement.format(month=single_date.strftime("%m"), day=single_date.strftime("%d")) | |
partitions.append(partition) | |
partitions_str = '\n'.join(partitions) | |
alter_table_statement = "ALTER TABLE cloudtrail_log ADD IF NOT EXISTS {partitions}" | |
alter_table_statement = alter_table_statement.format(partitions=partitions_str) | |
print alter_table_statement |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment