Last active
November 23, 2022 21:14
-
-
Save maiconbaum/195c031d4500769841b470cf2b8d183d to your computer and use it in GitHub Desktop.
This is a SQL Query to create a table in AWS Glue Data Catalog using Amazon Athena Query Editor in order to query Transit Gateway Flow Logs using Apache Parquet format, with Hive Compatible mode enabled and daily partition.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE EXTERNAL TABLE IF NOT EXISTS transit_gateway ( | |
`version` int, | |
`resource_type` string, | |
`account_id` string, | |
`tgw_id` string, | |
`tgw_attachment_id` string, | |
`tgw_src_vpc_account_id` string, | |
`tgw_dst_vpc_account_id` string, | |
`tgw_src_vpc_id` string, | |
`tgw_dst_vpc_id` string, | |
`tgw_src_subnet_id` string, | |
`tgw_dst_subnet_id` string, | |
`tgw_src_eni` string, | |
`tgw_dst_eni` string, | |
`tgw_src_az_id` string, | |
`tgw_dst_az_id` string, | |
`tgw_pair_attachment_id` string, | |
`srcaddr` string, | |
`dstaddr` string, | |
`srcport` int, | |
`dstport` int, | |
`protocol` bigint, | |
`packets` bigint, | |
`bytes` bigint, | |
`start` bigint, | |
`end` bigint, | |
`log_status` string, | |
`type` string, | |
`packets_lost_no_route` bigint, | |
`packets_lost_blackhole` bigint, | |
`packets_lost_mtu_exceeded` bigint, | |
`packets_lost_ttl_expired` bigint, | |
`tcp_flags` int, | |
`region` string, | |
`flow_direction` string, | |
`pkt_src_aws_service` string, | |
`pkt_dst_aws_service` string | |
) | |
PARTITIONED BY ( | |
`aws-account-id` string, | |
`aws-service` string, | |
`aws-region` string, | |
`year` string, | |
`month` string, | |
`day` string | |
) | |
ROW FORMAT SERDE | |
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' | |
STORED AS INPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' | |
OUTPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' | |
LOCATION | |
's3://<bucket-name>/AWSLogs/' | |
TBLPROPERTIES ( | |
'EXTERNAL'='true', | |
'skip.header.line.count'='1' | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is the AWS CLI command to create the compatible Flow Logs: