Created
March 21, 2018 04:54
-
-
Save toddlers/112707afa5c1f37cf1bd9c2b9d6eb2a5 to your computer and use it in GitHub Desktop.
s3logs_analysis via Athena
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Create a database in Athena */ | |
CREATE DATABASE s3_AccessLogsDB; | |
/* | |
Create a table within the Athena database | |
Replace s3://MY_S3_BUCKET/ with your S3 bucket | |
which has the access logs you want to query | |
*/ | |
CREATE EXTERNAL TABLE IF NOT EXISTS s3_AccessLogs.Accesslogs( | |
BucketOwner string, | |
Bucket string, | |
RequestDateTime string, | |
RemoteIP string, | |
Requester string, | |
RequestID string, | |
Operation string, | |
Key string, | |
RequestURI_operation string, | |
RequestURI_key string, | |
RequestURI_httpProtoversion string, | |
HTTPstatus string, | |
ErrorCode string, | |
BytesSent string, | |
ObjectSize string, | |
TotalTime string, | |
TurnAroundTime string, | |
Referrer string, | |
UserAgent string, | |
VersionId string) | |
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' | |
WITH SERDEPROPERTIES ( | |
'serialization.format' = '1', | |
'input.regex' = '([^ ]*) ([^ ]*) \\[(.*?)\\] ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) \\\"([^ ]*) ([^ ]*) (- |[^ ]*)\\\" (-|[0-9]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) (\"[^\"]*\") ([^ ]*)$' | |
) LOCATION 's3://MY_S3_BUCKET/' | |
/* | |
Query for access outside of AWS and your VPC | |
Replace 172.31% with your VPC CIDR | |
*/ | |
SELECT Key, | |
UserAgent, | |
RemoteIp, | |
count(*) AS cnt | |
FROM Accesslogs | |
WHERE regexp_like(RequestURI_operation, 'GET|HEAD') | |
AND Requester LIKE '-' | |
AND NOT regexp_like(UserAgent, 'Elastic|aws') | |
AND RemoteIp NOT LIKE '172.31%' | |
GROUP BY RemoteIp, Key, UserAgent, RemoteIp | |
ORDER BY cnt DESC LIMIT 10 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment