Skip to content

Instantly share code, notes, and snippets.



Last active Aug 29, 2015
What would you like to do?
#!/usr/bin/env python
# coding=utf-8
# pip install requests beautifulsoup4
# TODO: handle category selection (fucked up serverside)
# fix seldom weird price detection
from __future__ import division
import re
import datetime
import argparse
import itertools
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import requests
from bs4 import BeautifulSoup
URL_SEARCH = '?is=1&p={page}&f={term}&n={ad_type}&r=&i=%d&o=datum' % PER_PAGE
URL_CATEGORIES = {'guitarother': '/kytarova-pouzdra-a-prislusenstvi/110600/',
'guitarfx': '/kytarove-efekty/110500/',
'any': '/vsechny-kategorie/0/', }
AD_TYPES = dict(sell='nabidka', buy='poptavka', other='ruzne')
MONTHS = (u'ledna', u'února', u'března', u'dubna', u'května', u'června',
u'července', u'srpna', u'září', u'října', u'listopadu', u'prosince')
MONTHS_MAP = dict(zip(MONTHS, range(1, 13)))
def parse_date(date_str):
today =
date = today if 'dnes' in date_str else today-datetime.timedelta(days=1)
sdate ='(\d+)\.\s+(\w+)', date_str, re.UNICODE)
if sdate:
day, month = sdate.groups()
month_num = MONTHS_MAP[month]
year = today.year if month_num < today.month else today.year-1
date =, month_num, int(day))
return date
def parse_doc(resp, days, max_price, **kwargs):
doc = BeautifulSoup(resp.content)
for one in'td.InzeratBody'):
loc_data_ ='div.InzeratKontakt')
loc_data = loc_data_[0].text if loc_data_ else ''
date = parse_date(loc_data)
if days and ( - date).days > days:
img_ ='a.fancybox')
img = img_[0]['href'] if img_ else None
title_ ='div.InzeratNadpisSmall')[0]
title, link = title_.text, URL_BASE + title_.find('a')['href']
text ='div.InzeratTextSmall')[0].text
price_ ='div.InzeratCenaSmall')
price = price_[0].text if price_ else '?'
czkm ='(\d+) Kč', price)
price_czk = int(float(czkm.groups()[0].replace(',', '.'))) if czkm else 0
if max_price and not czkm:
if max_price and price_czk > max_price:
yield title, link, text, img, u'%d Kč' % price_czk if czkm else price, loc_data
def search(term, category='any', ad_type='sell', days=0, max_price=None, **kwargs):
base_url = URL_BASE + URL_CATEGORIES[category]
url = base_url + URL_SEARCH.format(term=term, ad_type=AD_TYPES[ad_type], page=1)
docs = [requests.get(url)]
total_ ='Celkem\s(\d+)', docs[0].content)
total = int(total_.groups()[0])//PER_PAGE+1
total = total if total < 10 else 10 # 500 items should be enough
for page in range(2, total+1):
url = base_url + URL_SEARCH.format(term=term, ad_type=AD_TYPES[ad_type], page=page)
next_ = requests.get(url)
if next_.status_code !=
return itertools.chain.from_iterable(map(lambda x: parse_doc(x, days=days, max_price=max_price, **kwargs), docs))
def mail(subj, mess, to_, from_=''):
msg = MIMEMultipart('alternative')
p1 = MIMEText(mess, 'plain', 'utf-8')
msg['Subject'] = subj
msg['From'] = from_
msg['To'] = to_
s = smtplib.SMTP('localhost')
s.sendmail(from_, [to_], msg.as_string().encode('ascii'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('terms', nargs="+", help='search terms')
# parser.add_argument('-c', '--category', default='any', choices=URL_CATEGORIES.keys(),
# help='category to search (defaults to all)')
#parser.add_argument('-t', '--ad_type', default='sell', choices=AD_TYPES.keys(), help='ad type')
parser.add_argument('-p', '--max_price', type=int, default=0, help='max price')
parser.add_argument('-m', '--mail', default=False, help='mail to',)
parser.add_argument('-s', '--short', action='store_true', default=False, help='short listing',)
parser.add_argument('-d', '--days', type=int, default=0, help='just ads within [days]')
args = parser.parse_args()
body = ''
for term in args.terms:
for one in search(term, **vars(args)):
title, link, text, img, price, loc_data = one
body += ' '.join((title, price, loc_data))
if not args.short:
body += '\n%s' % text
body += '\n%s\n\n' % ' '.join((link, img if img else ''))
if body:
if args.mail:
subj = u'bazarbot "%s"' % ' OR '.join(args.terms)
subj = subj if not args.max_price else subj + (u' < %d Kč' % args.max_price)
mail(subj, body, to_=args.mail)
print body

This comment has been minimized.

Copy link
Owner Author

@starenka starenka commented Mar 1, 2015 'dunlop mxr' 'boss' 'whammy' -p1600 -d1 #mail me any dunlop/boss/whammy hits cheaper then 1600 czk added yesterday (into your crontabz) 'fender jazzmaster' #show may any ads selling jazzmaster

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.