Last active
October 28, 2020 06:41
-
-
Save shantanuo/d2ab8abe85e14c930c991d48661faafe to your computer and use it in GitHub Desktop.
S3 report analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download the report from "billing dashboard" - "Reports" - "AWS Usage Report". | |
# In the "service" dropdown, select "Amazon Simple Storage Service" - "daily" - "Download report CSV". | |
import pandas as pd | |
from datetime import date, timedelta | |
yesterday = date.today() - timedelta(1) | |
mydate = yesterday.strftime('%m/%d/%y 00:00:00') | |
#mydate = yesterday.strftime('%m/%d/%Y 0:00') | |
df=pd.read_csv('report_daily.csv') | |
df.columns=df.columns.str.strip() | |
df[(df['StartTime']== mydate) & (df['UsageType'] == 'StorageObjectCount')].groupby(['Resource', 'Operation'])['UsageValue'].agg(['max']).unstack().fillna('0') | |
########################## updated version ################################### | |
import pandas as pd | |
from datetime import date, timedelta | |
yesterday = date.today() - timedelta(1) | |
df=pd.read_csv('report_today.csv') | |
df.columns=df.columns.str.strip() | |
df.StartTime=pd.to_datetime(df['StartTime']) | |
df[(df.StartTime==pd.Timestamp(yesterday)) & (df['UsageType'] == 'StorageObjectCount')].groupby(['Resource', 'Operation'])['UsageValue'].agg(['max']).unstack().fillna('0') | |
################################## update 2 ############################# | |
#### shell script | |
#!/bin/sh | |
mkdir newtemp/ | |
for i in `aws s3 ls | cut -d ' ' -f 3` | |
do | |
aws s3 ls --recursive s3://$i >> newtemp/$i.txt | |
done | |
#### python script | |
import glob, os | |
import pandas as pd | |
d = list() | |
for filename in glob.glob('/tmp/newtemp/*.txt'): | |
fname=os.path.basename(filename).split('.')[0] | |
df = pd.read_fwf(filename, widths=[20,11,500], names = ['date', 'size', 'filename' ] ) | |
df['bucketname'] = fname | |
d.append(df) | |
df=pd.concat(d) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment