Skip to content

Instantly share code, notes, and snippets.

@Siliconrob
Forked from danclien/add_partition.sql
Created August 11, 2017 19:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Siliconrob/788ba8c115e738ffb2f4e38e374eb974 to your computer and use it in GitHub Desktop.
Save Siliconrob/788ba8c115e738ffb2f4e38e374eb974 to your computer and use it in GitHub Desktop.
Queries I used to import ALB logs into AWS Athena. Based off of https://medium.com/@robwitoff/athena-alb-log-analysis-b874d0958909
ALTER TABLE your_table_name_here
ADD PARTITION (year='2017', month='*', day='*')
LOCATION 's3://your-alb-log-bucket/processed/AWSLogs/00000-change-with-your-account-id/elasticloadbalancing/us-east-1-change-with-your-region/2017/'
CREATE EXTERNAL TABLE IF NOT EXISTS alb.your_table_name_here (
type string,
time string,
elb string,
client_ip string,
client_port int,
target_ip string,
target_port int,
request_processing_time decimal,
target_processing_time decimal,
response_processing_time decimal,
elb_status_code int,
target_status_code string,
received_bytes string,
sent_bytes string,
request_verb string,
request_url string,
request_proto string,
user_agent string,
ssl_cipher string,
ssl_protocol string,
target_group_arn string,
trace_id string
)
PARTITIONED BY(year string, month string, day string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1',
'input.regex' = '^([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*):([0-9]*) ([^ ]*) ([-+]?[0-9]*\.?[0-9]+) ([-+]?[0-9]*\.?[0-9]+) ([0-9]*) ([0-9]*) ([^ ]*) ([^ ]*) \"([^ ]*) ([^ ]*) ([^ ]*)\" \"([^\"]*)\" ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)$'
) LOCATION 's3://your-alb-log-bucket/processed/AWSLogs/00000-change-with-your-account-id/elasticloadbalancing/us-east-1-change-with-your-region/'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment