Skip to content

Instantly share code, notes, and snippets.

@shantanuo
Last active October 28, 2020 06:41
Show Gist options
  • Save shantanuo/d2ab8abe85e14c930c991d48661faafe to your computer and use it in GitHub Desktop.
Save shantanuo/d2ab8abe85e14c930c991d48661faafe to your computer and use it in GitHub Desktop.
S3 report analysis
# Download the report from "billing dashboard" - "Reports" - "AWS Usage Report".
# In the "service" dropdown, select "Amazon Simple Storage Service" - "daily" - "Download report CSV".
import pandas as pd
from datetime import date, timedelta
yesterday = date.today() - timedelta(1)
mydate = yesterday.strftime('%m/%d/%y 00:00:00')
#mydate = yesterday.strftime('%m/%d/%Y 0:00')
df=pd.read_csv('report_daily.csv')
df.columns=df.columns.str.strip()
df[(df['StartTime']== mydate) & (df['UsageType'] == 'StorageObjectCount')].groupby(['Resource', 'Operation'])['UsageValue'].agg(['max']).unstack().fillna('0')
########################## updated version ###################################
import pandas as pd
from datetime import date, timedelta
yesterday = date.today() - timedelta(1)
df=pd.read_csv('report_today.csv')
df.columns=df.columns.str.strip()
df.StartTime=pd.to_datetime(df['StartTime'])
df[(df.StartTime==pd.Timestamp(yesterday)) & (df['UsageType'] == 'StorageObjectCount')].groupby(['Resource', 'Operation'])['UsageValue'].agg(['max']).unstack().fillna('0')
################################## update 2 #############################
#### shell script
#!/bin/sh
mkdir newtemp/
for i in `aws s3 ls | cut -d ' ' -f 3`
do
aws s3 ls --recursive s3://$i >> newtemp/$i.txt
done
#### python script
import glob, os
import pandas as pd
d = list()
for filename in glob.glob('/tmp/newtemp/*.txt'):
fname=os.path.basename(filename).split('.')[0]
df = pd.read_fwf(filename, widths=[20,11,500], names = ['date', 'size', 'filename' ] )
df['bucketname'] = fname
d.append(df)
df=pd.concat(d)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment