Skip to content

Instantly share code, notes, and snippets.

@Th3redTea
Created April 3, 2017 10:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Th3redTea/0c6b2671cdc8145441e1b700e3a3ceac to your computer and use it in GitHub Desktop.
Save Th3redTea/0c6b2671cdc8145441e1b700e3a3ceac to your computer and use it in GitHub Desktop.
start of the avito's scraper
#!/usr/bin/python3
from bs4 import BeautifulSoup
from urllib.error import HTTPError
import requests
def creat_links():
# swimming into the bewsite and grab every deals link
global nameList ,links
links = []
try:
r = requests.get('https://www.avito.ma/fr/marrakech/ordinateurs_portables-%C3%A0_vendre')
bsObj = BeautifulSoup(r.content, 'lxml')
nameList = bsObj.find_all("div", {"class": "ctext3 fs12"})
except HTTPError as e:
print(e)
for naem in nameList:
items = naem.find_all("h2", {"class": "fs14"})
for item in items:
link_of_pages = item.a.get("href")
links.append(link_of_pages)
#
def items_info():
for link in links:
r = requests.get(str(link))
bsObj = BeautifulSoup(r.content, 'lxml')
title = bsObj.find_all('h1', {"class": "page-header mbm"})
for _ in title:
titre = str(_.string)
prices = bsObj.find_all('span', {'class': "amount value"})
for p in prices:
price = str(p.contents[0])
descri = bsObj.find_all('div',{'class': "span10"})
for d in descri:
description = str(d.string)
print(description)
creat_links()
items_info()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment