Skip to content

Instantly share code, notes, and snippets.

@sakamaki-kazuyoshi
Last active September 3, 2019 22:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sakamaki-kazuyoshi/9300f2b527c6916cef7cf0124ed264c1 to your computer and use it in GitHub Desktop.
Save sakamaki-kazuyoshi/9300f2b527c6916cef7cf0124ed264c1 to your computer and use it in GitHub Desktop.
CloudFront Access Logs Conversion
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
</head>
<body>
<p>test</p>
</body>
</html>
import os
import boto3
import logging
import gzip
import codecs
import re
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
logger = logging.getLogger()
logger.setLevel(logging.ERROR)
def lambda_handler(event, context):
logger.info('## event')
logger.info(event)
output_folder = os.environ['S3_OUTPUT_FOLDER'] # アップロード先フォルダ名(Lambda環境変数で指定)
source_bucket = event['Records'][0]['s3']['bucket']['name'] # Lambda関数呼び出し元バケット名
source_key = event['Records'][0]['s3']['object']['key'] # オブジェクトキー取得
result = source_key.split('/') # オブジェクトキーから、フォルダ名とファイル名を分離
upper_folder_name = result[0] # 上位フォルダ名
lower_folder_name = result[1] # 下位フォルダ名
cf_log_name_gz = result[2] # CloudFrontアクセスログ名
cf_log_name_csv = cf_log_name_gz.replace('.gz','.csv') # 展開後のファイル名
# カレントディレクトリ移動
os.chdir('/tmp')
# CloudFrontログダウンロード
source_bucket_obj = s3.Bucket(source_bucket)
source_bucket_obj.download_file(source_key, cf_log_name_gz)
# gzip展開してtsvからcsvに変換
with gzip.open(cf_log_name_gz,mode='rb') as f:
reader = codecs.getreader("utf-8")
contents = reader(f)
with open(cf_log_name_csv,mode='w',encoding='utf-8',newline='\n') as newf:
# タブを「,」に置換
sub_text = re.sub('\t', ',', contents.read())
newf.write(sub_text)
#パス結合
s3_object_key = os.path.join(output_folder, lower_folder_name)
ful_s3_object_key = os.path.join(s3_object_key, cf_log_name_csv)
# 宛先バケットでオブジェクト(フォルダ)をリストして、フォルダの存在チェック
res_list_objects = s3_client.list_objects_v2(
Bucket = source_bucket, Prefix = s3_object_key
)
# アップロード先バケットのフォルダ有無確認
if res_list_objects.get('Contents'):
# csvファイルアップロード
s3_client.upload_file(cf_log_name_csv,source_bucket ,ful_s3_object_key)
else:
# アップロード先バケットにフォルダ作成
source_bucket_obj.put_object(Key=s3_object_key + '/')
# csvファイルアップロード
s3_client.upload_file(cf_log_name_csv,source_bucket ,ful_s3_object_key)
service: AccessLogConversion
custom:
LogBucketName: cloudfront-accesslog-xxxxxxxxxxxx #CloudFrontログ出力バケット
OutPutFolderName: ungzip #宛先フォルダ
provider:
name: aws
runtime: python3.7
region: ap-northeast-1
stage: v1
iamRoleStatements:
- Effect: "Allow"
Action:
- "s3:*"
Resource:
- "arn:aws:s3:::${self:custom.LogBucketName}"
- Effect: "Allow"
Action:
- "s3:*"
Resource:
- "arn:aws:s3:::${self:custom.LogBucketName}/*"
functions:
ExpandCloudFrontLog:
handler: lambda_function.lambda_handler
name: AccessLogConversion
timeout: 300
memorySize: 128
environment:
S3_OUTPUT_FOLDER: ${self:custom.OutPutFolderName}
AWSTemplateFormatVersion: 2010-09-09
Description: CloudFront Distribution with a S3 Static Website Hosting
Parameters:
SystemName:
Type: String
Description: SystemName of your website.
Default: 'systemname'
CloudFrontLoggingPrefix:
Type: String
Description: CloudFront Access Logs output bucket prefix.
Default: 'original/neko.tk'
Resources:
LogBucket:
Type: 'AWS::S3::Bucket'
Properties:
BucketName: !Sub ${SystemName}-log-bucket-${AWS::AccountId}
LifecycleConfiguration:
Rules:
- Id: Delete-After-365days
Status: Enabled
ExpirationInDays: '365'
AccessControl: 'LogDeliveryWrite'
DeletionPolicy: Retain
ContentsBucket:
Type: 'AWS::S3::Bucket'
Properties:
BucketName: !Sub ${SystemName}-origne-bucket-${AWS::AccountId}
VersioningConfiguration:
Status: Enabled
DeletionPolicy: Retain
ContentsBucketPolicy:
Type: 'AWS::S3::BucketPolicy'
Properties:
Bucket: !Ref ContentsBucket
PolicyDocument:
Statement:
- Action: s3:GetObject
Effect: Allow
Resource: !Sub arn:aws:s3:::${ContentsBucket}/*
Principal:
AWS: !Sub arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${CloudFrontOriginAccessIdentity}
ContentsDistribution:
Type: 'AWS::CloudFront::Distribution'
Properties:
DistributionConfig:
Origins:
- Id: S3Origin
DomainName: !GetAtt ContentsBucket.DomainName
S3OriginConfig:
OriginAccessIdentity: !Sub origin-access-identity/cloudfront/${CloudFrontOriginAccessIdentity}
Enabled: true
DefaultRootObject: index.html
DefaultCacheBehavior:
TargetOriginId: S3Origin
ForwardedValues:
QueryString: false
ViewerProtocolPolicy: redirect-to-https
Logging:
Bucket: !GetAtt LogBucket.DomainName
Prefix: !Sub ${CloudFrontLoggingPrefix}
HttpVersion: 'http2'
CloudFrontOriginAccessIdentity:
Type: AWS::CloudFront::CloudFrontOriginAccessIdentity
Properties:
CloudFrontOriginAccessIdentityConfig:
Comment: !Sub ${SystemName}-oai
Outputs:
ContentsBucket:
Value: !Ref ContentsBucket
Export:
Name: !Sub ${SystemName}-ContentsBucket
ContentsDistribution:
Value: !Ref ContentsDistribution
Export:
Name: !Sub ${SystemName}-ContentsDistribution
URL:
Value:
!Join ['', ['http://', !GetAtt [ContentsDistribution, DomainName]]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment