Skip to content

Instantly share code, notes, and snippets.

@ValdikSS
Created October 15, 2015 15:54
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save ValdikSS/5801c0c8a0e279d33466 to your computer and use it in GitHub Desktop.
Save ValdikSS/5801c0c8a0e279d33466 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import requests
import re
from pyquery import PyQuery as pq
class Adtuple(tuple):
def __str__(self):
return "{} {}".format(self[1], self[0])
tree = pq("https://www.avito.ru/sankt-peterburg/tovary_dlya_kompyutera/komplektuyuschie/protsessory")
posts = tree("div.description")
def find_good(adlist):
good_list = []
for ad in adlist:
#print(ad[0])
if re.search("[iI][ \-]*[57][ \-][34][0-9]{3}", ad[0]):
#if re.search("[Ii]ntel", ad[0]):
good_list.append(ad)
return good_list
ads = []
for post in posts:
title = pq(post)('h3.title>a').text().strip()
price = pq(post)('div.about').text().strip()
ads.append(Adtuple((title, price)))
for good_ad in find_good(ads):
print(good_ad)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment