Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save gnomeria/90c9632f879be5fd604f23ca70cfa884 to your computer and use it in GitHub Desktop.
Save gnomeria/90c9632f879be5fd604f23ca70cfa884 to your computer and use it in GitHub Desktop.
Athena SQL Create DB Query From S3 Access Logs
CREATE EXTERNAL TABLE IF NOT EXISTS my_athena_db_name.app1_access_logs(
BucketOwner STRING,
Bucket STRING,
RequestDateTime STRING,
RemoteIP STRING,
Requester STRING,
RequestID STRING,
Operation STRING,
Key STRING,
RequestURI_operation STRING,
RequestURI_key STRING,
RequestURI_httpProtoversion STRING,
HTTPstatus STRING,
ErrorCode STRING,
BytesSent BIGINT,
ObjectSize BIGINT,
TotalTime STRING,
TurnAroundTime STRING,
Referrer STRING,
UserAgent STRING,
VersionId STRING,
HostId STRING,
SigV STRING,
CipherSuite STRING,
AuthType STRING,
EndPoint STRING,
TLSVersion STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1', 'input.regex' = '([^ ]*) ([^ ]*) \\[(.*?)\\] ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) \\\"([^ ]*) ([^ ]*) (- |[^ ]*)\\\" (-|[0-9]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) (\"[^\"]*\") ([^ ]*)(?: ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*))?.*$' ) LOCATION 's3://mybucketname/prefix'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment