Created
March 19, 2018 15:30
-
-
Save pistachiyoda/235ce579080e0a8b2e4b4f0dacc5e757 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import urllib.parse | |
import boto3 | |
import gzip | |
import csv | |
from datetime import datetime | |
from operator import itemgetter, attrgetter | |
def lambda_handler(event, context): | |
# 使用するバケット | |
bucket = 'BUCKET_NAME' | |
########各月の最新データへのパスをKeys[]に格納する######## | |
def json_serial(obj): | |
if isinstance(obj, datetime): | |
return obj.isoformat() | |
raise TypeError ("Type %s not serializable" % type(obj)) | |
# そのバケット内のフォルダごとの最新のオブジェクトを取得 | |
# Keysが格納されているPrefixを格納する配列 | |
Prefixes = [] | |
# 最新データへのパスを格納する配列 | |
Keys = [] | |
client = boto3.client('s3') | |
result = client.list_objects_v2(Bucket=bucket, Prefix='/billingreport/', Delimiter='/') | |
for o in result.get('CommonPrefixes'): | |
prefix = o.get('Prefix') | |
Prefixes.append(prefix) | |
for prefix in Prefixes: | |
response = client.list_objects( | |
Bucket=bucket, | |
Prefix=prefix | |
) | |
str_response = json.loads(json.dumps(response, default=json_serial)) | |
contents = str_response['Contents'] | |
#content部をLastModifiedで並び替え、最新のものを取得 | |
sorted_contents = sorted(contents,key=lambda x:x['LastModified'],reverse=True) | |
#もしlatest_contentに格納されたのがcsv.gzだったら格納、ちがったら2周目 | |
for latest_content in sorted_contents: | |
if 'csv.gz' in latest_content['Key']: | |
Keys.append(latest_content['Key']) | |
break | |
else: | |
continue | |
########Keys[]にパスが格納された各月のデータをトリムしてマージ | |
client = boto3.client('s3') | |
fieldnames = [ | |
'lineItem/UsageStartDate', | |
'lineItem/ProductCode', | |
'lineItem/BlendedCost', | |
'lineItem/UsageType', | |
'lineItem/CurrencyCode', | |
'lineItem/LineItemDescription', | |
'product/productFamily', | |
'product/region', | |
'resourceTags/user:Cost' | |
] | |
try: | |
with open('/tmp/processed.csv', 'w') as f: | |
writer = csv.DictWriter(f,fieldnames=fieldnames) | |
writer.writeheader() | |
for key in Keys: | |
response = client.get_object(Bucket=bucket, Key=key) | |
gz = gzip.GzipFile(fileobj=response['Body']) | |
with open('/tmp/origin.csv', 'w') as f: | |
f.write(gz.read().decode('utf-8')) | |
with open('/tmp/origin.csv') as f: | |
reader = csv.DictReader(f) | |
with open('/tmp/processed.csv','a') as csvfile: | |
writer = csv.DictWriter(csvfile,fieldnames=fieldnames) | |
for row in reader: | |
writer.writerow( | |
{ | |
'lineItem/UsageStartDate': row['lineItem/UsageStartDate'], | |
'lineItem/ProductCode': row['lineItem/ProductCode'], | |
'lineItem/BlendedCost': row['lineItem/BlendedCost'], | |
'lineItem/CurrencyCode': row['lineItem/CurrencyCode'], | |
'lineItem/UsageType': row['lineItem/UsageType'], | |
'lineItem/LineItemDescription': row['lineItem/LineItemDescription'], | |
'product/productFamily': row['product/productFamily'], | |
'product/region': row['product/region'], | |
'resourceTags/user:Cost': row['resourceTags/user:Cost'], | |
} | |
) | |
with open('/tmp/processed.csv', 'r') as f: | |
client.put_object( | |
Bucket=bucket, | |
Key='latest/marged_processed_latest.csv', | |
Body=f.read() | |
) | |
return response['ContentType'] | |
except Exception as e: | |
print(e) | |
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket)) | |
raise e | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment