Created
April 3, 2017 10:49
-
-
Save Th3redTea/0c6b2671cdc8145441e1b700e3a3ceac to your computer and use it in GitHub Desktop.
start of the avito's scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
from bs4 import BeautifulSoup | |
from urllib.error import HTTPError | |
import requests | |
def creat_links(): | |
# swimming into the bewsite and grab every deals link | |
global nameList ,links | |
links = [] | |
try: | |
r = requests.get('https://www.avito.ma/fr/marrakech/ordinateurs_portables-%C3%A0_vendre') | |
bsObj = BeautifulSoup(r.content, 'lxml') | |
nameList = bsObj.find_all("div", {"class": "ctext3 fs12"}) | |
except HTTPError as e: | |
print(e) | |
for naem in nameList: | |
items = naem.find_all("h2", {"class": "fs14"}) | |
for item in items: | |
link_of_pages = item.a.get("href") | |
links.append(link_of_pages) | |
# | |
def items_info(): | |
for link in links: | |
r = requests.get(str(link)) | |
bsObj = BeautifulSoup(r.content, 'lxml') | |
title = bsObj.find_all('h1', {"class": "page-header mbm"}) | |
for _ in title: | |
titre = str(_.string) | |
prices = bsObj.find_all('span', {'class': "amount value"}) | |
for p in prices: | |
price = str(p.contents[0]) | |
descri = bsObj.find_all('div',{'class': "span10"}) | |
for d in descri: | |
description = str(d.string) | |
print(description) | |
creat_links() | |
items_info() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment