Skip to content

Instantly share code, notes, and snippets.

@Bouke
Forked from rcoup/aws_usage.py
Last active April 19, 2017 18:01
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save Bouke/5719905 to your computer and use it in GitHub Desktop.
Save Bouke/5719905 to your computer and use it in GitHub Desktop.
Catch login failures / captcha input
#!/usr/bin/env python
"""
A script to query the Amazon Web Services usage reports programmatically.
Ideally this wouldn't exist, and Amazon would provide an API we can use
instead, but hey - that's life.
Basically takes your AWS account username and password, logs into the
website as you, and grabs the data out. Always gets the 'All Usage Types'
report for the specified service.
Requirements:
* Mechanize: http://wwwsearch.sourceforge.net/mechanize/
You can install this via pip/easy_install
Run with -h to see the available options.
"""
import os
import sys
from datetime import date
import time
import mechanize
FORMATS = ('xml', 'csv')
PERIODS = ('hours', 'days', 'months')
SERVICES = ('AmazonS3', 'AmazonEC2', 'AmazonCloudFront', 'AmazonSimpleDB', 'AWSQueueService', 'IngestionService', 'AmazonVPC',)
FORM_URL = "https://aws-portal.amazon.com/gp/aws/developer/account/index.html?ie=UTF8&action=usage-report"
def get_report(service, date_from, date_to, username, password, format='csv', period='days', debug=False):
br = mechanize.Browser()
br.set_handle_robots(False)
if debug:
# Log information about HTTP redirects and Refreshes.
br.set_debug_redirects(True)
# Log HTTP response bodies (ie. the HTML, most of the time).
br.set_debug_responses(True)
# Print HTTP headers.
br.set_debug_http(True)
br.addheaders = [
# the login process 404s if you leave Python's UA string
('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1) Gecko/20090701 Ubuntu/9.04 (jaunty) Firefox/3.5'),
('Accept', 'text/html, application/xml, */*'),
]
# login
print >>sys.stderr, "Logging in..."
try:
resp = browser.open(FORM_URL)
#Some funkiness in DOCTYPE string. mechanize doesn't like
#results in: mechanize._form.ParseError: unexpected '\\' char in declaration
#if we don't strip out
resp.set_data(re.sub('<!DOCTYPE(.*)>', '', resp.get_data()))
browser.set_response(resp)
browser.select_form(name="signIn")
browser["email"] = username
browser["password"] = password
resp = browser.submit() # submit current form
except Exception, e:
print >>sys.stderr, "Error logging in to AWS"
raise
#Some funkiness in DOCTYPE string. mechanize doesn't like
#results in: mechanize._form.ParseError: unexpected '\\' char in declaration
#if we don't strip out
resp.set_data(re.sub('<!DOCTYPE(.*)>', '', resp.get_data()))
browser.set_response(resp)
if browser.title() == 'Amazon Web Services Sign In':
error = re.search('\<div id="message_warning"(.+?)\<p\>(.+?)\</p\>',
resp.get_data(), re.DOTALL)
print >>sys.stderr, "Error logging in to AWS: ", error.group(2)
captcha = re.search('(https://capi-na.ssl-images-amazon.com/([^"]+))"',
resp.get_data())
print 'Please enter the captcha as shown on the following url: '
print captcha.group(1)
browser.select_form(name='signIn')
browser['email'] = username
browser['password'] = password
browser['guess'] = raw_input('Captcha: ')
resp = browser.submit()
#Some funkiness in DOCTYPE string. mechanize doesn't like
#results in: mechanize._form.ParseError: unexpected '\\' char in declaration
#if we don't strip out
resp.set_data(re.sub('<!DOCTYPE(.*)>', '', resp.get_data()))
browser.set_response(resp)
if browser.title() == 'Amazon Web Services Sign In':
error = re.search('\<div id="message_warning"(.+?)\<p\>(.+?)\</p\>',
resp.get_data(), re.DOTALL)
print >>sys.stderr, "Error logging in to AWS: ", error.group(2)
raise Exception('Still failed to login')
# service selector
print >>sys.stderr, "Selecting service %s..." % service
br.select_form(name="usageReportForm")
br["productCode"] = [service]
resp = br.submit()
# report selector
print >>sys.stderr, "Building report..."
br.select_form(name="usageReportForm")
br["timePeriod"] = ["Custom date range"]
br["startYear"] = [str(date_from.year)]
br["startMonth"] = [str(date_from.month)]
br["startDay"] = [str(date_from.day)]
br["endYear"] = [str(date_to.year)]
br["endMonth"] = [str(date_to.month)]
br["endDay"] = [str(date_to.day)]
br["periodType"] = [period]
resp = br.submit("download-usage-report-%s" % format)
return resp.read()
if __name__ == "__main__":
from optparse import OptionParser
USAGE = (
"Usage: %prog [options] -s SERVICE DATE_FROM DATE_TO\n\n"
"DATE_FROM and DATE_TO should be in YYYY-MM-DD format (eg. 2009-01-31)\n"
"Username and Password can also be specified via AWS_USERNAME and AWS_PASSWORD environment variables.\n"
"\n"
"Available Services: " + ', '.join(SERVICES)
)
parser = OptionParser(usage=USAGE)
parser.add_option('-s', '--service', dest="service", type="choice", choices=SERVICES, help="The AWS service to query")
parser.add_option('-p', '--period', dest="period", type="choice", choices=PERIODS, default='days', metavar="PERIOD", help="Period of report entries")
parser.add_option('-f', '--format', dest="format", type="choice", choices=FORMATS, default='csv', metavar="FORMAT", help="Format of report")
parser.add_option('-U', '--username', dest="username", metavar="USERNAME", help="Email address for your AWS account")
parser.add_option('-P', '--password', dest="password", metavar="PASSWORD")
parser.add_option('-d', '--debug', action="store_true", dest="debug", default=False)
opts, args = parser.parse_args()
if len(args) < 2:
parser.error("Missing date range")
date_range = [date(*time.strptime(args[i], '%Y-%m-%d')[0:3]) for i in range(2)]
if date_range[1] < date_range[0]:
parser.error("End date < start date")
if not opts.service:
parser.error("Specify a service to query!")
if not opts.username and not os.environ.get('AWS_USERNAME'):
parser.error("Must specify username option or set AWS_USERNAME")
if not opts.password and not os.environ.get('AWS_PASSWORD'):
parser.error("Must specify password option or set AWS_PASSWORD")
kwopts = {
'service': opts.service,
'date_from': date_range[0],
'date_to': date_range[1],
'format': opts.format,
'period': opts.period,
'username': opts.username or os.environ.get('AWS_USERNAME'),
'password': opts.password or os.environ.get('AWS_PASSWORD'),
'debug': opts.debug,
}
print get_report(**kwopts)
@sgessa
Copy link

sgessa commented Jun 17, 2013

It's not working for me!

I've replaced all 'browser' entries with 'br'.
Looks like browser variable is not being instantiated anywhere.

Without the -t switch now I'm getting:

Please enter the captcha as shown on the following url: 

https://capi-na.ssl-images-amazon.com/blablabla....

Captcha: fptkp6

Error logging in to AWS: Traceback (most recent call last):
  File "./aws_usage2.py", line 172, in
    print get_report(**kwopts)
  File "./aws_usage2.py", line 104, in get_report
    print >>sys.stderr, "Error logging in to AWS: ", error.group(2)
AttributeError: 'NoneType' object has no attribute 'group'

Could you please help me? Thank you

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment