Skip to content

Instantly share code, notes, and snippets.

@zakx
Created January 4, 2014 00:28
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save zakx/8249581 to your computer and use it in GitHub Desktop.
Save zakx/8249581 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# setup: pip install requests beautifulsoup4
from decimal import Decimal
import requests
from bs4 import BeautifulSoup
import sys
import getpass
username = raw_input("Username: ")
password = getpass.getpass("Password: ")
# Session setup
session = requests.Session()
session.headers['User-Agent'] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.3 Safari/537.36"
# Request login page
login_r = session.get("https://www.amazon.de/gp/css/order-history/")
login = BeautifulSoup(login_r.content)
payload = {'email': username, 'password': password}
for x in login.find_all("input", type="hidden"):
payload[x.attrs['name']] = x.attrs['value']
# Log in
order_r = session.post("https://www.amazon.de/ap/signin", data=payload, allow_redirects=False)
# Request order history
hist_r = session.get("https://www.amazon.de/gp/css/order-history/")
soup = BeautifulSoup(hist_r.content)
filters = [options.attrs['value'] for options in soup.find('select', id='orderFilter').findChildren()[1:]]
print "[+] Found %d filters, processing..." % len(filters)
total = Decimal("0.00")
total_orders = 0
for scope in filters:
scope_r = session.get("https://www.amazon.de/gp/css/order-history/?ie=UTF8&orderFilter=%s&startIndex=0" % scope)
scope_soup = BeautifulSoup(scope_r.content)
try:
length = int(scope_soup.find('div', 'num-results').b.text)
except AttributeError:
length = 0
sys.stdout.write("[+] Processing %s \t(%s orders)... " % (scope.rjust(9), str(length).rjust(4)))
sys.stdout.flush()
page = 10
scope_sum = Decimal("0.00")
for x in scope_soup.find_all('span', 'price'):
scope_sum += Decimal(x.text[4:].replace(",","."))
while page <= length:
scope_page_r = session.get("https://www.amazon.de/gp/css/order-history/?ie=UTF8&orderFilter=%s&startIndex=%d" % (scope, page))
for y in BeautifulSoup(scope_page_r.content).find_all('span', 'price'):
scope_sum += Decimal(y.text[4:].replace(",","."))
page += 10
print "\t%s EUR" % (str(scope_sum).rjust(10))
if scope.startswith("year"):
total += scope_sum
total_orders += length
print "[+] Grand total (years only) \t(%s orders)... \t%s EUR" % (str(total_orders).rjust(4), str(total).rjust(10))
@ixs
Copy link

ixs commented Jan 4, 2014

Das ranzige script mal weiter ranzig aufgebohrt, dass es auch amazon.com und amazon.co.uk kann...

--- damazon.py  2014-01-04 14:28:24.848546800 +0100
+++ ../damazon.py   2014-01-04 14:29:53.874448500 +0100
@@ -8,6 +8,16 @@
 import sys
 import getpass

+host = raw_input("Amazon Host: (default: www.amazon.de) ")
+if not host:
+   host = 'www.amazon.de'    
+if host.endswith('.com'):
+   currency = 'USD'
+elif host.endswith('.co.uk'):
+   currency = 'GBP'
+else:
+   currency = 'EUR'
+
 username = raw_input("Username: ")
 password = getpass.getpass("Password: ")
 # Session setup
@@ -15,7 +25,7 @@
 session.headers['User-Agent'] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.3 Safari/537.36"

 # Request login page
-login_r = session.get("https://www.amazon.de/gp/css/order-history/")
+login_r = session.get("https://%s/gp/css/order-history/" % (host))
 login = BeautifulSoup(login_r.content)

 payload = {'email': username, 'password': password}
@@ -23,10 +33,10 @@
    payload[x.attrs['name']] = x.attrs['value']

 # Log in
-order_r = session.post("https://www.amazon.de/ap/signin", data=payload, allow_redirects=False)
+order_r = session.post("https://%s/ap/signin" % (host), data=payload, allow_redirects=False)

 # Request order history
-hist_r = session.get("https://www.amazon.de/gp/css/order-history/")
+hist_r = session.get("https://%s/gp/css/order-history/" % (host))

 soup = BeautifulSoup(hist_r.content)
 filters = [options.attrs['value'] for options in soup.find('select', id='orderFilter').findChildren()[1:]]
@@ -37,7 +47,7 @@
 total_orders = 0

 for scope in filters:
-   scope_r = session.get("https://www.amazon.de/gp/css/order-history/?ie=UTF8&orderFilter=%s&startIndex=0" % scope)
+   scope_r = session.get("https://%s/gp/css/order-history/?ie=UTF8&orderFilter=%s&startIndex=0" % (host, scope))
    scope_soup = BeautifulSoup(scope_r.content)
    try:
        length = int(scope_soup.find('div', 'num-results').b.text)
@@ -48,15 +58,21 @@
    page = 10
    scope_sum = Decimal("0.00")
    for x in scope_soup.find_all('span', 'price'):
-       scope_sum += Decimal(x.text[4:].replace(",","."))
+       if host.endswith('.com') or host.endswith('.co.uk'):
+           scope_sum += Decimal(x.text[1:])
+       else:
+           scope_sum += Decimal(x.text[4:].replace(".","").replace(",","."))
    while page <= length:
-       scope_page_r = session.get("https://www.amazon.de/gp/css/order-history/?ie=UTF8&orderFilter=%s&startIndex=%d" % (scope, page))
+       scope_page_r = session.get("https://%s/gp/css/order-history/?ie=UTF8&orderFilter=%s&startIndex=%d" % (host, scope, page))
        for y in BeautifulSoup(scope_page_r.content).find_all('span', 'price'):
-           scope_sum += Decimal(y.text[4:].replace(",","."))
+           if host.endswith('.com'):
+               scope_sum += Decimal(x.text[1:])
+           else:
+               scope_sum += Decimal(y.text[4:].replace(".","").replace(",","."))
        page += 10
-   print "\t%s EUR" % (str(scope_sum).rjust(10))
+   print "\t%s %s" % (str(scope_sum).rjust(10), currency)
    if scope.startswith("year"):
        total += scope_sum
        total_orders += length

-print "[+] Grand total (years only) \t(%s orders)... \t%s EUR" % (str(total_orders).rjust(4), str(total).rjust(10))
+print "[+] Grand total (years only) \t(%s orders)... \t%s %s" % (str(total_orders).rjust(4), str(total).rjust(10), currency)

@balzer82
Copy link

balzer82 commented Jul 7, 2014

Sorry, aber

Traceback (most recent call last):
  File "damazon.py", line 32, in <module>
    filters = [options.attrs['value'] for options in soup.find('select', id='orderFilter').findChildren()[1:]]
AttributeError: 'NoneType' object has no attribute 'findChildren'

@Cybso
Copy link

Cybso commented Jul 7, 2014

Es liegt am username. Gebe ich den direkt im Programm ein, dann funktioniert es. Die Ausgabe des Username mit print ist aber in beiden Fällen binär identisch. Da fuck...?

Edit: Entweder ich war mehrmals zu blöde meine Daten korrekt einzugeben, oder der Abruf der History ist nicht völlig zuverlässig. Bei weiteren Versuchen einige Zeit später funktionierte es.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment