realhackcraft/main.py

## main.py
import os
import csv
import inspect
import random

import requests
from colorama import Fore
from bs4 import BeautifulSoup

data = []
base_url = 'http://books.toscrape.com'


def scrap_books():
    # Set base URL and page number
    page_number = 1

    # Set flag to indicate scraping is not finished
    scraping = True

    while scraping:
        # Make request and get HTML
        r = requests.get(base_url + "/catalogue/page-{}.html".format(page_number))
        html = r.text

        # Create BeautifulSoup object
        soup = BeautifulSoup(html, 'html.parser')

        # Find all books
        books = soup.find_all('article', class_='product_pod')

        # Scrape data for each book
        for book in books:
            title = book.h3.a['title']
            price = book.find('div', class_='product_price').p.text
            price = price.replace('Â', '')  # Remove 'Â' character from price
            link = base_url + book.h3.a['href']

            # Append data for this book to list
            data.append((title, price, link))

        # Check if next page exists
        next_button = soup.find('li', class_='next')
        if not next_button:
            scraping = False
        else:
            page_number += 1

    # Write data to CSV
    with open('books.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(('title', 'price', 'link'))  # Add header row
        writer.writerows(data)

    # Print success message
    num_books = len(data)
    message = f"Finished scraping {base_url} and found {num_books} books. "
    print(rainbow_text(message) + print_link(os.path.abspath('books.csv')))


def rainbow_text(text):
    rainbow = [Fore.RED, Fore.YELLOW, Fore.GREEN, Fore.CYAN, Fore.BLUE, Fore.MAGENTA]
    output = random.choice(rainbow) + text
    return output


def print_link(file=None, line=None):
    """ Print a link in PyCharm to a line in file.
        Defaults to line where this function was called. """
    if file is None:
        file = inspect.stack()[1].filename
    if line is None:
        line = inspect.stack()[1].lineno
    string = f'File "{file}", line {max(line, 1)}'.replace("\\", "/")
    return string


if __name__ == "__main__":
    scrap_books()
	import os
	import csv
	import inspect
	import random

	import requests
	from colorama import Fore
	from bs4 import BeautifulSoup

	data = []
	base_url = 'http://books.toscrape.com'


	def scrap_books():
	# Set base URL and page number
	page_number = 1

	# Set flag to indicate scraping is not finished
	scraping = True

	while scraping:
	# Make request and get HTML
	r = requests.get(base_url + "/catalogue/page-{}.html".format(page_number))
	html = r.text

	# Create BeautifulSoup object
	soup = BeautifulSoup(html, 'html.parser')

	# Find all books
	books = soup.find_all('article', class_='product_pod')

	# Scrape data for each book
	for book in books:
	title = book.h3.a['title']
	price = book.find('div', class_='product_price').p.text
	price = price.replace('Â', '') # Remove 'Â' character from price
	link = base_url + book.h3.a['href']

	# Append data for this book to list
	data.append((title, price, link))

	# Check if next page exists
	next_button = soup.find('li', class_='next')
	if not next_button:
	scraping = False
	else:
	page_number += 1

	# Write data to CSV
	with open('books.csv', 'w', newline='') as csvfile:
	writer = csv.writer(csvfile)
	writer.writerow(('title', 'price', 'link')) # Add header row
	writer.writerows(data)

	# Print success message
	num_books = len(data)
	message = f"Finished scraping {base_url} and found {num_books} books. "
	print(rainbow_text(message) + print_link(os.path.abspath('books.csv')))


	def rainbow_text(text):
	rainbow = [Fore.RED, Fore.YELLOW, Fore.GREEN, Fore.CYAN, Fore.BLUE, Fore.MAGENTA]
	output = random.choice(rainbow) + text
	return output


	def print_link(file=None, line=None):
	""" Print a link in PyCharm to a line in file.
	Defaults to line where this function was called. """
	if file is None:
	file = inspect.stack()[1].filename
	if line is None:
	line = inspect.stack()[1].lineno
	string = f'File "{file}", line {max(line, 1)}'.replace("\\", "/")
	return string


	if __name__ == "__main__":
	scrap_books()