Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@rdkls
Last active July 11, 2023 13:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rdkls/b0cea7202067976525bb962b2788a735 to your computer and use it in GitHub Desktop.
Save rdkls/b0cea7202067976525bb962b2788a735 to your computer and use it in GitHub Desktop.
athena table create statement for one bucket holding multiple load balancer access logs (default example is limited to one table per alb)
Note the projection for api/alb name is an enum not 'injected' to enable querying on it
CREATE EXTERNAL TABLE IF NOT EXISTS alb_logs (
type string,
time string,
elb string,
client_ip string,
client_port int,
target_ip string,
target_port int,
request_processing_time double,
target_processing_time double,
response_processing_time double,
elb_status_code int,
target_status_code string,
received_bytes bigint,
sent_bytes bigint,
request_verb string,
request_url string,
request_proto string,
user_agent string,
ssl_cipher string,
ssl_protocol string,
target_group_arn string,
trace_id string,
domain_name string,
chosen_cert_arn string,
matched_rule_priority string,
request_creation_time string,
actions_executed string,
redirect_url string,
lambda_error_reason string,
target_port_list string,
target_status_code_list string,
classification string,
classification_reason string
)
PARTITIONED BY
(
api_name STRING,
day STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1',
'input.regex' =
'([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\s]+?)\" \"([^\s]+)\" \"([^ ]*)\" \"([^ ]*)\"')
LOCATION 's3://MY_ALB_ACCESS_LOGS/'
TBLPROPERTIES
(
"projection.enabled" = "true",
"projection.api_name.type" = "enum",
"projection.api_name.values" = "a,list,of,toplevel,folders,each,lb,creates",
"projection.day.type" = "date",
"projection.day.range" = "2022/12/05,NOW",
"projection.day.format" = "yyyy/MM/dd",
"projection.day.interval" = "1",
"projection.day.interval.unit" = "DAYS",
"storage.location.template" = "s3://MY_ALB_ACCESS_LOGS/${api_name}/AWSLogs/MY_AWS_ACCOUNT_ID/elasticloadbalancing/ap-southeast-2/${day}"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment