Skip to content

Instantly share code, notes, and snippets.

@allatambov
Last active March 22, 2022 15:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allatambov/304182891a67a8fe93905ef542f25e3a to your computer and use it in GitHub Desktop.
Save allatambov/304182891a67a8fe93905ef542f25e3a to your computer and use it in GitHub Desktop.
from selenium import webdriver as wd
br = wd.Chrome("/Users/allat/Desktop/chromedriver")
# br = wd.Chrome("C://Users/allat/Desktop/chromedriver.exe")
br.get("http://www.biblio-globus.ru/")
search = br.find_element_by_id("search_string")
search.clear()
search.send_keys("Python")
button = br.find_element_by_id("search_submit")
button.click()
html = br.page_source
from bs4 import BeautifulSoup
soup = BeautifulSoup(html)
divs = soup.find_all("div", {"class" : "details_1"})
d = divs[0]
author = d.find("div", {"class" : "author"}).text
title = d.find("a", {"class" : "name"}).text
place = d.find("div", {"class" : "placement"}).text
price_str = d.find("div", {"class" : "title_data price"}).text
price = price_str.split()[1].split(",")[0]
def get_book(d):
author = d.find("div", {"class" : "author"}).text
title = d.find("a", {"class" : "name"}).text
place = d.find("div", {"class" : "placement"}).text
price_str = d.find("div", {"class" : "title_data price"}).text
price = price_str.split()[1].split(",")[0]
return title, author, price, place
books = []
for book in divs:
res = get_book(book)
books.append(res)
last = br.find_element_by_link_text("Последняя")
last_href = last.get_attribute("href")
last_page = int(last_href.split("page=")[1])
all_pages = []
for i in range(1, last_page + 1):
h = f"http://www.biblio-globus.ru/search/catalog/products?query=Python&page={i}"
all_pages.append(h)
import requests
from time import sleep
info = []
for link in all_pages:
page = requests.get(link)
soup = BeautifulSoup(page.text)
divs = soup.find_all("div", {"class" : "details_1"})
books = []
for book in divs:
res = get_book(book)
books.append(res)
info.extend(books)
sleep(0.5)
print(link)
import pandas as pd
dat_books = pd.DataFrame(info)
dat_books.columns = ["title", "author", "price", "place"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment