Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Quick python-mechanze script to dump ShinyLoot order history
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""Quick script to dump ShinyLoot order records before they close"""
from __future__ import (absolute_import, division, print_function,
with_statement, unicode_literals)
__author__ = "Stephan Sokolow (deitarion/SSokolow)"
__appname__ = "ShinyLoot Order Details Dumper"
__version__ = "0.1a1"
__license__ = "MIT"
import logging, mechanize, os, re, socket, time, urllib2
log = logging.getLogger(__name__)
class Scraper(object):
"""Simple class to tie together the stateful bits for scraping orders."""
order_list_url = 'http://www.shinyloot.com/m/orders'
order_id_re = re.compile(r'[^/]*//www.shinyloot.com/m/order_detail/(\d+)')
default_timeout = 10
def __init__(self, username, password, browser=None):
self.username = username
self.password = password
self.browser = browser or mechanize.Browser()
def _get_page(self, url, predicate):
"""Retrieve a page.
Assume we've been redirected to a login form if we don't match a
predicate.
"""
log.info("Retrieving %s...", url)
try:
time.sleep(3) # Hopefully this will prevent getting blocked again
page = self.browser.open(url, timeout=self.default_timeout)
except urllib2.URLError as err:
if not isinstance(err.reason, socket.timeout):
raise
log.warning("Timed out. Retrying...")
return self._get_page(url, predicate)
else:
contents = page.read()
if not predicate(self.browser, page, contents):
log.info("Failed predicate. Trying to log in.")
self.do_login()
return self._get_page(url, predicate)
return page, contents
def do_login(self):
"""Login to the current page or die"""
try:
self.browser.select_form(nr=0)
self.browser['username'] = self.username
self.browser['password'] = self.password
self.browser.submit()
except (AttributeError, mechanize.FormNotFoundError):
print(self.browser.response().read())
log.critical("Unexpected failure")
raise
def get_order_list(self):
"""Save the order list to file and return the detail URLs.
@note: Will overwrite previously-saved copies, unlike L{get_details}.
"""
_, page_contents = self._get_page(self.order_list_url,
lambda _i, _j, contents: 'Order History' in contents)
with open('orders.html', 'wb') as fobj:
fobj.write(page_contents)
return list(self.browser.links(predicate=lambda
x: '//www.shinyloot.com/m/order_detail/' in x.url))
def get_details(self, url):
"""Validate a detail URL, then save it to disk if not already done."""
order_id = self.order_id_re.match(url)
if not order_id:
log.warning("Not an order detail URL. Skipping: %s", url)
return
order_id = order_id.group(1)
filename = order_id + '.html'
if os.path.exists(filename):
log.info("Already downloaded. Skipping: %s", filename)
return
_, page_contents = self._get_page(url,
lambda _i, _j, contents: 'Order Detail' in contents)
with open(filename, 'wb') as fobj:
fobj.write(page_contents)
def main():
"""The main entry point, compatible with setuptools entry points."""
# If we're running on Python 2, take responsibility for preventing
# output from causing UnicodeEncodeErrors. (Done here so it should only
# happen when not being imported by some other program.)
import sys
if sys.version_info.major < 3:
reload(sys)
sys.setdefaultencoding('utf-8') # pylint: disable=no-member
from argparse import ArgumentParser, RawTextHelpFormatter
parser = ArgumentParser(formatter_class=RawTextHelpFormatter,
description=__doc__.replace('\r\n', '\n').split('\n--snip--\n')[0])
parser.add_argument('--version', action='version',
version="%%(prog)s v%s" % __version__)
parser.add_argument('-v', '--verbose', action="count",
default=2, help="Increase the verbosity. Use twice for extra effect")
parser.add_argument('-q', '--quiet', action="count",
default=0, help="Decrease the verbosity. Use twice for extra effect")
# Reminder: %(default)s can be used in help strings.
parser.add_argument('username')
parser.add_argument('password')
args = parser.parse_args()
# Set up clean logging to stderr
log_levels = [logging.CRITICAL, logging.ERROR, logging.WARNING,
logging.INFO, logging.DEBUG]
args.verbose = min(args.verbose - args.quiet, len(log_levels) - 1)
args.verbose = max(args.verbose, 0)
logging.basicConfig(level=log_levels[args.verbose],
format='%(levelname)s: %(message)s')
scraper = Scraper(args.username, args.password)
orders = scraper.get_order_list()
for link in orders:
scraper.get_details(link.url)
log.info("Done.")
if __name__ == '__main__':
main()
# vim: set sw=4 sts=4 expandtab :
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment