Skip to content

Instantly share code, notes, and snippets.

@mgla
Last active February 21, 2022 12:08
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mgla/eb0873cca1af0979687b9be403411b97 to your computer and use it in GitHub Desktop.
Save mgla/eb0873cca1af0979687b9be403411b97 to your computer and use it in GitHub Desktop.

Random useful Athena queries

CloudTrail

Find out who modified a IAM user policy

SELECT eventname,
         useridentity.arn,
         sourceIPAddress,
         eventtime,
         requestparameters
FROM db.name
WHERE eventsource = 'iam.amazonaws.com'
        AND eventtime > '2018-01-01T00:00:00Z'
        AND eventtime < '2018-05-09T00:00:00Z'
        AND (eventname = 'AttachUserPolicy'
        OR eventname = 'PutUserPolicy')
ORDER BY  eventtime

Find out who modified a IAM role policy

SELECT eventname,
     useridentity.arn,
     sourceIPAddress,
     eventtime,
     requestparameters
FROM db.name
WHERE eventsource = 'iam.amazonaws.com'
        AND eventtime > '2018-01-01T00:00:00Z'
        AND (eventname = 'AttachRolePolicy'
        OR eventname = 'PutRolePolicy')
ORDER BY  eventtime

Blame user for resource:

SELECT eventname,
         useridentity.arn,
         sourceIPAddress,
         eventtime,
         requestparameters
FROM db.name
WHERE (requestparameters LIKE '%i-12345678%'
        OR requestparameters LIKE '%sg-12345678%')
        AND eventtime > '2018-03-01T00:00:00Z'
        AND eventtime < '2018-03-20T00:00:00Z'
ORDER BY  eventtime asc

Find out who started instances in time frame in AWS region

SELECT eventname,
         useridentity.arn,
         sourceIPAddress,
         eventtime,
         requestparameters
FROM db.name
WHERE eventname = 'RunInstances'
        AND awsregion = 'eu-central-1'
        AND eventtime > '2018-05-01T00:00:00Z'
        AND eventtime < '2018-06-07T00:00:00Z'
ORDER BY  eventtime asc

ELB

Create ELB table

CREATE EXTERNAL TABLE IF NOT EXISTS elb.elb_logs (
         request_timestamp string,
         elb_name string,
         request_ip string,
         request_port int,
         backend_ip string,
         backend_port int,
         request_processing_time double,
         backend_processing_time double,
         client_response_time double,
         elb_response_code string,
         backend_response_code string,
         received_bytes bigint,
         sent_bytes bigint,
         request_verb string,
         url string,
         protocol string,
         user_agent string,
         ssl_cipher string,
         ssl_protocol string 
) 
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
         'serialization.format' = '1', 'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:\-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \\\"([^ ]*) ([^ ]*) (- |[^ ]*)\\\" (\"[^\"]*\") ([A-Z0-9-]+) ([A-Za-z0-9.-]*)$' ) LOCATION 's3://bucket/AWSLogs/account_id/elasticloadbalancing/';

Static resources in period of time

SELECT t.url,
         count(t.url) AS count,
         t.method
FROM 
    (SELECT url,
         request_verb AS method
    FROM "db"."elb_logs"
    WHERE url LIKE '%.png'
     or url LIKE '%.jpg'
     or url LIKE '%.jpeg'
     or url LIKE '%.css'
     or url LIKE '%.js'
            AND request_timestamp > '2018-07-17T00:00:00.00Z'
            AND request_timestamp < '2018-07-24T00:00:00.00Z') t
GROUP BY  t.url, t.method
ORDER BY  count(t.url) desc

Order ELB logs by hostname

SELECT t.hostname, count(t.hostname) as count
FROM 
    (SELECT regexp_extract(url,
         '(//)([^:]+):', 2) AS hostname, url, elb_response_code, user_agent
    FROM db.elb_logs
    WHERE timestamp > '2018-05-19T08:45:00.00Z') t
GROUP BY  t.hostname
ORDER BY  count(t.hostname) DESC

ALB

Create ALB table

CREATE EXTERNAL TABLE IF NOT EXISTS alb.acc (
         type string,
         time string,
         elb string,
         client_ip string,
         client_port int,
         target_ip string,
         target_port int,
         request_processing_time double,
         target_processing_time double,
         response_processing_time double,
         elb_status_code string,
         target_status_code string,
         received_bytes bigint,
         sent_bytes bigint,
         request_verb string,
         request_url string,
         request_proto string,
         user_agent string,
         ssl_cipher string,
         ssl_protocol string,
         target_group_arn string,
         trace_id string,
         domain_name string,
         chosen_cert_arn string,
         matched_rule_priority string,
         request_creation_time string,
         actions_executed string,
         redirect_url string,
         new_field string 
) 
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
         'serialization.format' = '1', 'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\"($| \"[^ ]*\")(.*)') LOCATION 's3://bucket/AWSLogs/account_id/elasticloadbalancing/eu-central-1'

Aggregate ALB requests by IP

SELECT t.client_ip,
         count(t.client_ip)
FROM 
    (SELECT client_ip,
         time
    FROM "alb"."acc"
    WHERE elb LIKE 'app/LBNAME%'
            AND time > '2018-07-01T00:00:00Z') t
GROUP BY  t.client_ip
ORDER BY  count(t.client_ip) DESC;

Find errors by time

SELECT request_timestamp,
        request_ip,
        url,
        request_verb,
        elb_status_code,
        user_agent
FROM "db"."alb_logs"
WHERE time > '2018-05-24T08:45:00.00Z'
        AND time < '2018-05-24T09:30:00.00Z'
        AND elb_status_code LIKE '5%' limit 100

Group by errors

SELECT count(elb_status_code) as count, url
FROM db.elb_logs
WHERE time > '2022-01-24T08:45:00.00Z'
        AND time < '2022-01-24T09:05:00.00Z'
        AND elb_status_code LIKE '5%'
group by url
order by count desc

Some extra information about number of requests, request size, for calculation if CloudFront can safe money. Adjust according to your private agreement

SELECT sum(sent_bytes) AS sent_bytes,
         count(*) AS request_count,
         sum(sent_bytes)/count(*) AS average_bytes,
         sum(sent_bytes)/(100*1024) AS free_requests,
         (count(*) - sum(sent_bytes)/(100*1024)) as payed_requests
         ((count(*) - sum(sent_bytes)/(100*1024))/count(*))*100.0 as payed_requests_prcnt
FROM "db"."alb_logs"

Retrieve the ten most used domain names since a certain day, excluding CloudFront requests.

SELECT count(hostname) AS count,
     hostname
FROM 
    (SELECT regexp_extract(request_url,
         '(//)([^:]+):', 2) AS hostname, request_url
    FROM "alb"."ntv_web_prod"
    WHERE user_agent != 'Amazon CloudFront'
            AND domain_name = '-'
            AND time > '2018-12-17T00:00:00Z')t
GROUP BY  t.hostname
ORDER BY  count(t.hostname) DESC limit 10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment