seeRead/wishlist_isbn_search.py

## wishlist_isbn_search.py
import re
import pprint
import urlparse
import urllib2
#http://blog.ianbicking.org/2008/12/10/lxml-an-underappreciated-web-scraping-library/
from lxml.html import parse, tostring, fromstring #for better css selectors than Beautiful Soup
from lxml.html.diff import htmldiff
from lxml import cssselect, etree
from urlparse import urlparse
from pytz import timezone
import pytz

pp = pprint.PrettyPrinter(indent=4)

#set your wishlist here
wishlist = ''
#set your library catalog search url here
catalog = 'https://catalog.houstonlibrary.org/client/hou/search/results?&dt=list&qu='


(true,false,null) = (True,False,None)
items = []

#set this to working endpoint for https://github.com/doitlikejustin/amazon-wish-lister
url = 'http://wishlist.dev/wishlist.php?id='+wishlist+'&reveal=all'
print url
json = urllib2.urlopen(url).read()
pp.pprint(items)
items += eval(json)
print 'WISHLIST PAGE'
print page
print 'TOTAL ITEMS'
print len(items)

print 'GETTING ISBNs'
for idx, item in enumerate(items):
    try:
        #item['isbn'] = item['link'].split("/")[5].strip("\\")
        item['isbn'] = item['link'].split("/")[2].strip("\\")
        items[idx] = item
    except Exception, err:
        print err

print 'FIRST ISBN WAS'
print items[0]['isbn']

success = []
fail = []

opener = urllib2.build_opener()
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11')]
result = None

#do as enumerable
for item in items:
    search = catalog + item['isbn']
    result = opener.open(search).read()
    page = fromstring(result)
    page.make_links_absolute(search)

    #if you are adapting to a different catalog, you will need to change this selector.  it should match if the page returns a book.  it shouldn't match if your library doesn't have the book.
    link = page.cssselect('#detail_main_wrapper0')
    #TODO add search to item dict
    if len(link) > 0:
        success.append(item)
        print 'success - ', item['name'] ,' - ', item['isbn'],' - ', search
    else :
        fail.append(item)
        print 'fail - ', item['name']

print 'SUCCESS'
for idx, item in enumerate(success):
    search = catalog + item['isbn']
    print idx, ' - ',  item['name'], ' - ',item['isbn'], ' - ', search
	import re
	import pprint
	import urlparse
	import urllib2
	#http://blog.ianbicking.org/2008/12/10/lxml-an-underappreciated-web-scraping-library/
	from lxml.html import parse, tostring, fromstring #for better css selectors than Beautiful Soup
	from lxml.html.diff import htmldiff
	from lxml import cssselect, etree
	from urlparse import urlparse
	from pytz import timezone
	import pytz

	pp = pprint.PrettyPrinter(indent=4)

	#set your wishlist here
	wishlist = ''
	#set your library catalog search url here
	catalog = 'https://catalog.houstonlibrary.org/client/hou/search/results?&dt=list&qu='


	(true,false,null) = (True,False,None)
	items = []

	#set this to working endpoint for https://github.com/doitlikejustin/amazon-wish-lister
	url = 'http://wishlist.dev/wishlist.php?id='+wishlist+'&reveal=all'
	print url
	json = urllib2.urlopen(url).read()
	pp.pprint(items)
	items += eval(json)
	print 'WISHLIST PAGE'
	print page
	print 'TOTAL ITEMS'
	print len(items)

	print 'GETTING ISBNs'
	for idx, item in enumerate(items):
	try:
	#item['isbn'] = item['link'].split("/")[5].strip("\\")
	item['isbn'] = item['link'].split("/")[2].strip("\\")
	items[idx] = item
	except Exception, err:
	print err

	print 'FIRST ISBN WAS'
	print items[0]['isbn']

	success = []
	fail = []

	opener = urllib2.build_opener()
	opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11')]
	result = None

	#do as enumerable
	for item in items:
	search = catalog + item['isbn']
	result = opener.open(search).read()
	page = fromstring(result)
	page.make_links_absolute(search)

	#if you are adapting to a different catalog, you will need to change this selector. it should match if the page returns a book. it shouldn't match if your library doesn't have the book.
	link = page.cssselect('#detail_main_wrapper0')
	#TODO add search to item dict
	if len(link) > 0:
	success.append(item)
	print 'success - ', item['name'] ,' - ', item['isbn'],' - ', search
	else :
	fail.append(item)
	print 'fail - ', item['name']

	print 'SUCCESS'
	for idx, item in enumerate(success):
	search = catalog + item['isbn']
	print idx, ' - ', item['name'], ' - ',item['isbn'], ' - ', search