Skip to content

Instantly share code, notes, and snippets.

@jamiefrench
Last active July 10, 2019 19:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamiefrench/193d6388a2f49a747ffa7580017ed05d to your computer and use it in GitHub Desktop.
Save jamiefrench/193d6388a2f49a747ffa7580017ed05d to your computer and use it in GitHub Desktop.
AWS Athena Classic ELB - Partitioned by Year, Month, Day
CREATE EXTERNAL TABLE IF NOT EXISTS {{DATABASE_NAME.TABLE_NAME}} (
timestamp string,
elb_name string,
request_ip string,
request_port int,
backend_ip string,
backend_port int,
request_processing_time double,
backend_processing_time double,
client_response_time double,
elb_response_code string,
backend_response_code string,
received_bytes bigint,
sent_bytes bigint,
request_verb string,
url string,
protocol string,
user_agent string,
ssl_cipher string,
ssl_protocol string
)
PARTITIONED BY(year string, month string, day string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1',
'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \\\"([^ ]*) ([^ ]*) (- |[^ ]*)\\\" (\"[^\"]*\") ([A-Z0-9-]+) ([A-Za-z0-9.-]*)$' )
LOCATION 's3://{{BUCKET_NAME}}/AWSLogs/{{ACCOUNT_ID}}/elasticloadbalancing/{{REGION}}/';
// To only query for June 2019 load data from Partition
ALTER TABLE {{DATABASE_NAME.TABLE_NAME}} ADD PARTITION (year='2019', month='06', day='*') location 's3://{{BUCKET_NAME}}/AWSLogs/{{ACCOUNT_ID}}/elasticloadbalancing/{{REGION}}/2019/06/';
// Query
SELECT timestamp, user_agent, request_verb,url, ssl_cipher,ssl_protocol
FROM {{DATABASE_NAME.TABLE_NAME}}
WHERE ssl_protocol='TLSv1'
LIMIT 10;
// Drop Partition
ALTER TABLE {{DATABASE_NAME.TABLE_NAME}} DROP PARTITION (year='2019', month='06', day='*');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment