Skip to content

Instantly share code, notes, and snippets.

@huevos-y-bacon
Last active March 7, 2024 16:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save huevos-y-bacon/3c5f0ace9e8b2357a7e53c4a01c49342 to your computer and use it in GitHub Desktop.
Save huevos-y-bacon/3c5f0ace9e8b2357a7e53c4a01c49342 to your computer and use it in GitHub Desktop.

Create Athena DB and table and run queries on AWS Classic Load Balancer logs.

SELECT * FROM "clb_logs"."clb_logs" limit 10;
SELECT
timestamp,
elb_name,
url,
protocol,
user_agent,
ssl_cipher,
ssl_protocol
FROM "clb_logs"."clb_logs"
WHERE elb_name like '<CLB_NAME_PREFIX>%'
AND timestamp like '2024-%'
LIMIT 1000000
;
SELECT
distinct(protocol),
count(protocol) as count
FROM "clb_logs"."clb_logs"
WHERE timestamp LIKE '2024-0%'
GROUP BY protocol
SELECT
distinct(ssl_cipher),
count(protocol) as count
FROM "clb_logs"."clb_logs"
WHERE timestamp LIKE '2024-0%'
GROUP BY ssl_cipher
SELECT
distinct(ssl_protocol),
count(protocol) as count
FROM "clb_logs"."clb_logs"
WHERE timestamp LIKE '2024-0%'
GROUP BY ssl_protocol
CREATE EXTERNAL TABLE IF NOT EXISTS `clb_logs`.`clb_logs` (
`timestamp` string,
`elb_name` string,
`request_ip` string,
`request_port` int,
`backend_ip` string,
`backend_port` int,
`request_processing_time` double,
`backend_processing_time` double,
`client_response_time` double,
`elb_response_code` string,
`backend_response_code` string,
`received_bytes` bigint,
`sent_bytes` bigint,
`request_verb` string,
`url` string,
`protocol` string,
`user_agent` string,
`ssl_cipher` string,
`ssl_protocol` string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \\\"([^ ]*) ([^ ]*) (- |[^ ]*)\\\" (\"[^\"]*\") ([A-Z0-9-]+) ([A-Za-z0-9.-]*)$',
'serialization.format' = '1'
)
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://<ELB_LOG_BUCKET_NAME>/';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment