Alin Preda aleenprd

## WorldPostCodeScraper.py
import pandas as pd
from urllib import request
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from typing import Union
from time import sleep


class WorldPostCodeScraper:
    """Scraper class for https://worldpostalcode.com/."""

## scrape_imdb_reviews_main.py
"""Python executable which scrapes IMDB for reviews."""


import argparse
import pandas as pd
from time import sleep
from tqdm import tqdm

from dependencies.general import timing
from dependencies.scrapers import ImdbReviewScraper

## ImdbReviewScraper.py
class ImdbReviewScraper(Scraper):
    """Implements methods for scraping IMDB.

    Inherited Attributes:
        chromedriver (chromedriver): a Chrome webdriver for Selenium.

    Own Methods:
        @staticmethod get_ratings_page
        @staticmethod get_reviews_page
        get_episodes_links

## ImdbReviewScraper.py
class ImdbReviewScraper(Scraper):
    """Implements methods for scraping IMDB.

    Inherited Attributes:
        chromedriver (chromedriver): a Chrome webdriver for Selenium.

    Own Methods:
        @staticmethod get_ratings_page
        @staticmethod get_reviews_page
        get_episodes_links

## scraper_base_class.py
class ScraperException(Exception):
    """Starting point for Scraper exceptions."""
    pass


class ImdbScraperException(ScraperException):
    """Starting point for Scraper exceptions."""
    pass


## imports_scraper_classes.py
# Data manipulation
import pandas as pd
import re as regex

# Scraping
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

## scrape_imdb_reviews_pages_main.py
@timing
def main(season_link: str, show_link: str, driver_service: Service, output_path: str) -> None:
    """Main function to scrape an IMDB season's reviews for each episode and also the general reviews.

    Args:
        season_link (str): URL pointing to season page.
        show_link (str): URL pointing to show general reviews.
        driver_service (Service): a Chrome web driver.
        output_path (str): path including filename where we want to save the CSV.
    """

## scrape_reviews_page.py
def scrape_reviews_page(reviews_soup: BeautifulSoup) -> pd.DataFrame:
    """Scrape IMDB reviews page.

    Note: Extracts ratings, usernames, review date, titles, review body text,
    number of reactions, total reactions to review.

    Args:
        reviews_soup (BeautifulSoup): soup of the entirely loaded reviews page.

    Returns:

## fetch_el_if_available.py
from typing import Union

def fetch_el_if_available(soup: BeautifulSoup, element_type: str, class_type: str) -> Union[str, None]:
    """Returns element text if found, otherwise returns None.

    Args:
        soup (BeautifulSoup): a b24 soup.
        element_type (str): HTML type e.g. 'div'.
        class_type (str): the class of the desired element.


## get_ratings_and_reviews_page.py
def get_ratings_page(episode_page: str, suffix: str="/ratings/?ref_=tt_ov_rt"):
    return ("/").join(episode_page.split("/")[:-1]) + suffix

def get_reviews_page(episode_page: str, suffix: str="/reviews?ref_=tt_urv"):
    return ("/").join(episode_page.split("/")[:-1]) + suffix
	import pandas as pd
	from urllib import request
	from bs4 import BeautifulSoup
	from fake_useragent import UserAgent
	from typing import Union
	from time import sleep


	class WorldPostCodeScraper:
	"""Scraper class for https://worldpostalcode.com/."""
	"""Python executable which scrapes IMDB for reviews."""


	import argparse
	import pandas as pd
	from time import sleep
	from tqdm import tqdm

	from dependencies.general import timing
	from dependencies.scrapers import ImdbReviewScraper
	class ImdbReviewScraper(Scraper):
	"""Implements methods for scraping IMDB.

	Inherited Attributes:
	chromedriver (chromedriver): a Chrome webdriver for Selenium.

	Own Methods:
	@staticmethod get_ratings_page
	@staticmethod get_reviews_page
	get_episodes_links
	class ScraperException(Exception):
	"""Starting point for Scraper exceptions."""
	pass


	class ImdbScraperException(ScraperException):
	"""Starting point for Scraper exceptions."""
	pass
	# Data manipulation
	import pandas as pd
	import re as regex

	# Scraping
	from bs4 import BeautifulSoup
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.service import Service
	from webdriver_manager.chrome import ChromeDriverManager
	@timing
	def main(season_link: str, show_link: str, driver_service: Service, output_path: str) -> None:
	"""Main function to scrape an IMDB season's reviews for each episode and also the general reviews.

	Args:
	season_link (str): URL pointing to season page.
	show_link (str): URL pointing to show general reviews.
	driver_service (Service): a Chrome web driver.
	output_path (str): path including filename where we want to save the CSV.
	"""
	def scrape_reviews_page(reviews_soup: BeautifulSoup) -> pd.DataFrame:
	"""Scrape IMDB reviews page.

	Note: Extracts ratings, usernames, review date, titles, review body text,
	number of reactions, total reactions to review.

	Args:
	reviews_soup (BeautifulSoup): soup of the entirely loaded reviews page.

	Returns:
	from typing import Union

	def fetch_el_if_available(soup: BeautifulSoup, element_type: str, class_type: str) -> Union[str, None]:
	"""Returns element text if found, otherwise returns None.

	Args:
	soup (BeautifulSoup): a b24 soup.
	element_type (str): HTML type e.g. 'div'.
	class_type (str): the class of the desired element.
	def get_ratings_page(episode_page: str, suffix: str="/ratings/?ref_=tt_ov_rt"):
	return ("/").join(episode_page.split("/")[:-1]) + suffix

	def get_reviews_page(episode_page: str, suffix: str="/reviews?ref_=tt_urv"):
	return ("/").join(episode_page.split("/")[:-1]) + suffix