Skip to content

Instantly share code, notes, and snippets.

@janinge
Last active December 29, 2015 00:59
Show Gist options
  • Save janinge/7590279 to your computer and use it in GitHub Desktop.
Save janinge/7590279 to your computer and use it in GitHub Desktop.
INFO207 spaghetti.
#!/usr/bin/env python
EBAY_APP_ID = ''
EBAY_DEV_ID = ''
EBAY_CER_ID = ''
EBAY_USER_TOKEN = ''
EBAY_CATEGORIES = [
31388, # Digital Cameras
11724, # Camcorders
171485, # iPads, Tablets and eBook readers
73839, # iPods and MP3 players
11071, # Televisions
112529, # Headphones
139973, # Video Games
156955, # GPS units
888 # Sporting goods
]
from ebaysdk import finding, shopping, trading
from datetime import datetime
from dateutil.parser import parse as dateparse
from collections import *
from Queue import Queue
from threading import Thread
import logging as log
log.basicConfig(format='%(levelname)s:%(funcName)s:%(lineno)d - %(message)s', level=log.DEBUG)
from urllib2 import urlopen
# ebaysdk use Soup for XML parsing
# I like soup too, :%s/re/bs4/g
try:
from bs4 import BeautifulSoup
except ImportError:
from BeautifulSoup import BeautifulSoup
finder = finding(appid=EBAY_APP_ID)
shopper = shopping(appid=EBAY_APP_ID, devid=EBAY_DEV_ID, certid=EBAY_CER_ID)
trader = trading(appid=EBAY_APP_ID, devid=EBAY_DEV_ID, certid=EBAY_CER_ID, token=EBAY_USER_TOKEN)
Auction = namedtuple('Auction', ('id', 'title', 'bids', 'price', 'shipping', 'sold', 'endtime'))
Bids = namedtuple('Bids', ('sniped', 'time_left', 'bidders', 'winner_bids', 'winner_new', 'new_ratio'))
def depaginator(api, call, input, output='searchResult', page=1, max_page=100, retries=3):
while page <= max_page and retries:
log.info("Retrieving page %i", page)
try:
api.execute(call, api._to_xml({'paginationInput': {'pageNumber': page}}) + api._to_xml(input))
except:
log.exception("Paginator failed in API call")
break
result = api.response_dict()
if result.get('ack', {}).get('value') != 'Success':
retries -= 1
log.warning("Unsuccessful %s when requesting page %i of %i. Retries left: %i. Error: %s",
call, page, max_page, retries, r.get('errors'))
continue
items = result.get(output, {}).get('item')
if items:
yield items
page += 1
else:
break
def find_candidate_products_in_category(category, target=40, **pages):
products = Counter()
for page in depaginator(finder, 'findCompletedItems', {
'categoryId': category,
'sortOrder': 'BidCountMost',
'itemFilter': [{'name': 'ListingType', 'value': 'Auction'},
{'name': 'Condition', 'value': 'New'}]
}, **pages):
for item in page:
pid = item.get('productId')
if pid and pid.get('type', {}).get('value') == 'ReferenceID':
products[int(pid.get('value', 0))] += 1
if max(products.values() or (0,)) >= target or \
page[-1].get('sellingStatus', {}).get('bidCount', {}).get('value') < 2:
break
return products
def find_auctions_from_product_id(category, product_id, id_type='ReferenceID', **pages):
auctions = set()
for page in depaginator(finder, 'findCompletedItems', finder._to_xml({
'categoryId': category,
'sortOrder': 'BidCountMost',
'itemFilter': [{'name': 'ListingType', 'value': 'Auction'},
{'name': 'Condition', 'value': 'New'}]
}) + '<productId type="%s">%s</productId>' % (id_type, product_id), **pages):
for item in page:
ss = item.get('sellingStatus', {})
listing = Auction(
id=int(item.get('itemId', {}).get('value', 0)) or None,
title=item.get('title', {}).get('value'),
bids=int(ss.get('bidCount', {}).get('value', 0)),
price=float(ss.get('convertedCurrentPrice', {}).get('value', 0)) or None,
shipping=float(item.get('shippingInfo', {}).get('shippingServiceCost', {}).get('value', 0)),
sold=ss.get('sellingState', {}).get('value') == 'EndedWithSales',
endtime=dateparse(item.get('listingInfo', {}).get('endTime', {}).get('value')))
if listing.bids < 2:
return auctions
auctions.add(listing)
return auctions
def find_samples_from_category_list(categories, min_auctions=10, max_samples=3, **pages):
targets = {}
for category in categories:
log.info("Collecting products from category %i", category)
products = find_candidate_products_in_category(category, **pages).most_common(max_samples)
targets[category] = [ x[0] for x in products if x[1] > min_auctions ]
return targets
def get_bid_summary_from_auction(item, endtime, bidlimit=1, timelimit=30.0, newbidcounts=[], oldbidcounts=[]):
trader.execute('GetAllBidders', {'CallMode': 'ViewAll', 'IncludeBiddingSummary': 'true', 'ItemID': item})
bidders = trader.response_dict().get('BidArray', {}).get('Offer', [])
winner = bidders[0]
timebid = dateparse(winner.get('TimeBid', {}).get('value', ''))
bidcount = int(winner.get('BidCount', {}).get('value', 1))
timeleft = (endtime - timebid).total_seconds()
sniped = timeleft < timelimit and bidcount <= bidlimit
isnew = lambda bidder: bidder.get('User', {}).get('NewUser', {}).get('value') == 'true'
newuser = isnew(winner)
newratio = sum(isnew(bidder) for bidder in bidders) / float(len(bidders))
for bidder in bidders:
bidcounter = newbidcounts if newuser else oldbidcounts
for auction in bidder.get('User', {}).get('BiddingSummary', {}).get('ItemBidDetails', []):
try:
bidcounter.append(int(auction.get('BidCount').get('value')))
except AttributeError:
continue
return Bids(sniped, timeleft, len(bidders), bidcount, newuser, newratio)
def scrape_snipers(item, endtime=None, bidlimit=1, timelimit=30.0):
soup = BeautifulSoup(urlopen("http://offer.ebay.com/ws/eBayISAPI.dll?ViewBids&item=%i" % item, timeout=20).read())
# Brute force date parser!
for sib in soup.find("span", text="Time Ended:").next_siblings:
if sib.text.strip():
try: endtime = dateparse(sib.text)
except SyntaxError: continue
else: break
bids = []
# How to build a house of cards in 10 easy steps
for row in soup.find(text="Starting Price").find_parent("table").find_all("tr", bgcolor=True):
a = row.find("a")
c = a.parent.next_sibling
if not a: continue # Drop proxy bids
aid = a.find(text=True, recursive=False)
bid = c.text.strip()
time = ' '.join(c.next_sibling.stripped_strings)
bids.append((aid, bid, dateparse(time)))
if not bids:
return
winner = bids[0][0]
if (endtime - bids[0][2]).total_seconds() < timelimit:
if len(filter(lambda x: x[0] == winner, bids)) <= bidlimit:
return True
return False
def classifier_thread(queue, results):
while True:
work = queue.get()
if not work: return
try:
results[work[1]] = work[0](*work[1:])
except:
log.exception("Bad things happened while classifying %s", work[1])
results[work[1]] = None
queue.task_done()
def launch_worker_pool(*args, **kwargs):
work_queue = Queue()
threads = [ Thread(target=kwargs.get('task', classifier_thread), args=(work_queue,) + args)
for id in range(kwargs.get('workers', 4)) ]
[ t.start() for t in threads ]
return (work_queue, threads)
def join_worker_pool(queue, threads):
queue.join()
[ queue.put(None) for t in threads ]
[ t.join() for t in threads ]
def store_auctions_dictionary():
auctions = {}
categories = find_samples_from_category_list(EBAY_CATEGORIES, max_page=40)
for category in EBAY_CATEGORIES:
auctions[category] = {}
for product in categories[category]:
auctions[category][product] = find_auctions_from_product_id(category, product)
store_obj('auctions', auctions)
def store_obj(obj, data):
import pickle
with open(obj + '.python', 'w') as fp:
pickle.dump(data, fp)
def load_obj(obj):
import pickle
with open(obj + '.python', 'r') as fp:
return pickle.load(fp)
def update_bids_from_selection(auctions, bids):
for auction in auctions:
if auction.id in bids:
continue
try:
summary = get_bid_summary_from_auction(auction.id, auction.endtime)
except:
log.exception("Calculating summary failed")
continue
bids[auction.id] = summary
def plot_ticks(auctions, bids, title=None, currency='USD'):
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
sniped = [ (a.endtime, a.price + a.shipping) for a in auctions if a.id in bids and bids[a.id].sniped ]
unsniped = [ (a.endtime, a.price + a.shipping) for a in auctions if a.id in bids and not bids[a.id].sniped ]
plt.plot_date(*zip(*sniped), color='red')
plt.plot_date(*zip(*unsniped), color='green')
plt.title(title)
plt.ylabel(currency)
plt.grid(True)
#plt.show()
plt.save(title + ".png")
def plot_box(auctions, bids, title=None, currency='USD'):
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax = fig.add_subplot(111)
plt.setp(ax, xticklabels=["Normal", "Sniped"])
sniped = []
unsniped = []
for auction in auctions:
if auction.id in bids:
(sniped if bids[auction.id].sniped else unsniped).append(auction.price + auction.shipping)
ax.boxplot([unsniped, sniped])
plt.ylabel(currency)
plt.grid(True)
#plt.show()
plt.save(title + ".png")
def main():
from pprint import pprint
sample_targets = {888: [141695192, 143330669, 99318098],
11724: [129664853, 129678208],
31388: [100127676, 100113265, 170243795],
73839: [154044229, 92295415, 118461770],
112529: [114567862, 114624173, 114561288],
139973: [153180066, 153175722, 77244068],
156955: [109402844, 115377395, 109369394],
171485: [117365730, 166792164, 117332436]}
auctions = load_obj('auctions')
bids = load_obj('bids')
for group in auctions:
for product in auctions[group].itervalues():
auction_selection = list(product)
print "Resolving", len(auction_selection), "auctions"
update_bids_from_selection(auction_selection, bids)
store_bids_dictionary(bids)
plot_ticks(auction_selection, bids, title=str(product))
plot_box(auction_selection, bids, title=str(product))
if __name__ == "__main__":
main()
@JahnMic3
Copy link

Hi, I'm new to github. Does this code find all bidders and bids for a set of past auctions (defined by keywords) on eBay using the eBay developers API? Thank you very much!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment