Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import json
import urllib.parse
import boto3
import gzip
import csv
from datetime import datetime
from operator import itemgetter, attrgetter
def lambda_handler(event, context):
# 使用するバケット
bucket = 'BUCKET_NAME'
########各月の最新データへのパスをKeys[]に格納する########
def json_serial(obj):
if isinstance(obj, datetime):
return obj.isoformat()
raise TypeError ("Type %s not serializable" % type(obj))
# そのバケット内のフォルダごとの最新のオブジェクトを取得
# Keysが格納されているPrefixを格納する配列
Prefixes = []
# 最新データへのパスを格納する配列
Keys = []
client = boto3.client('s3')
result = client.list_objects_v2(Bucket=bucket, Prefix='/billingreport/', Delimiter='/')
for o in result.get('CommonPrefixes'):
prefix = o.get('Prefix')
Prefixes.append(prefix)
for prefix in Prefixes:
response = client.list_objects(
Bucket=bucket,
Prefix=prefix
)
str_response = json.loads(json.dumps(response, default=json_serial))
contents = str_response['Contents']
#content部をLastModifiedで並び替え、最新のものを取得
sorted_contents = sorted(contents,key=lambda x:x['LastModified'],reverse=True)
#もしlatest_contentに格納されたのがcsv.gzだったら格納、ちがったら2周目
for latest_content in sorted_contents:
if 'csv.gz' in latest_content['Key']:
Keys.append(latest_content['Key'])
break
else:
continue
########Keys[]にパスが格納された各月のデータをトリムしてマージ
client = boto3.client('s3')
fieldnames = [
'lineItem/UsageStartDate',
'lineItem/ProductCode',
'lineItem/BlendedCost',
'lineItem/UsageType',
'lineItem/CurrencyCode',
'lineItem/LineItemDescription',
'product/productFamily',
'product/region',
'resourceTags/user:Cost'
]
try:
with open('/tmp/processed.csv', 'w') as f:
writer = csv.DictWriter(f,fieldnames=fieldnames)
writer.writeheader()
for key in Keys:
response = client.get_object(Bucket=bucket, Key=key)
gz = gzip.GzipFile(fileobj=response['Body'])
with open('/tmp/origin.csv', 'w') as f:
f.write(gz.read().decode('utf-8'))
with open('/tmp/origin.csv') as f:
reader = csv.DictReader(f)
with open('/tmp/processed.csv','a') as csvfile:
writer = csv.DictWriter(csvfile,fieldnames=fieldnames)
for row in reader:
writer.writerow(
{
'lineItem/UsageStartDate': row['lineItem/UsageStartDate'],
'lineItem/ProductCode': row['lineItem/ProductCode'],
'lineItem/BlendedCost': row['lineItem/BlendedCost'],
'lineItem/CurrencyCode': row['lineItem/CurrencyCode'],
'lineItem/UsageType': row['lineItem/UsageType'],
'lineItem/LineItemDescription': row['lineItem/LineItemDescription'],
'product/productFamily': row['product/productFamily'],
'product/region': row['product/region'],
'resourceTags/user:Cost': row['resourceTags/user:Cost'],
}
)
with open('/tmp/processed.csv', 'r') as f:
client.put_object(
Bucket=bucket,
Key='latest/marged_processed_latest.csv',
Body=f.read()
)
return response['ContentType']
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment