# Selenium WebDriver used for infinite scrolling
# Beautiful Soup used for scraping content

# Beautiful Soup Official Documentation - https://www.crummy.com/software/BeautifulSoup/bs4/doc

import sys
# Import the locators file
import os
from pprint import pprint
sys.path.append(sys.path[0] + "/../..")

from pageobject.locators import locators
from pageobject.locators import *

from pageobject.helpers import helpers
from pageobject.helpers import *

exec_platform = os.getenv('EXEC_PLATFORM')

def scrap_inifite_website(url) -> list:
    meta_data_arr = []

    if exec_platform == 'cloud':
        username = environ.get('LT_USERNAME', None)
        access_key = environ.get('LT_ACCESS_KEY', None)

        gridURL = "https://{}:{}@hub.lambdatest.com/wd/hub".format(username, access_key)
        
        ch_options = webdriver.ChromeOptions()
        ch_options.browser_version = "latest"
        ch_options.platform_name = "Windows 11"

        lt_options = {}
        lt_options["build"] = "Build: Web Scraping with Selenium & Beautiful Soup"
        lt_options["project"] = "Project: Web Scraping with Selenium & Beautiful Soup"
        lt_options["name"] = "Test: Web Scraping with Selenium & Beautiful Soup"

        lt_options["browserName"] = "Chrome"
        lt_options["browserVersion"] = "latest"
        lt_options["platformName"] = "Windows 11"

        lt_options["console"] = "error"
        lt_options["w3c"] = True
        lt_options["headless"] = True

        ch_options.set_capability('LT:Options', lt_options)

        driver = webdriver.Remote(
            command_executor = gridURL,
            options = ch_options
        )
    elif exec_platform == 'local':
        options = ChromeOptions()
        options.add_argument("--headless=new")
        driver = webdriver.Chrome(options=options)

    driver.get(url)

    # Took some support from https://stackoverflow.com/a/41181494/126105
    start_height = driver.execute_script("return document.documentElement.scrollHeight")

    while True:
        # Scroll to the bottom of the page
        driver.execute_script("window.scrollTo(0, " + str(start_height) + ")")
        # Wait for the content to load
        time.sleep(2)
        scroll_height = driver.execute_script("return document.documentElement.scrollHeight")
        if (scroll_height == start_height):
            # If heights are the same, we reached the end of page
            break
        time.sleep(2)
        start_height = scroll_height

    time.sleep(2)

    # Why features='html.parser' is required
    # The code that caused this warning is on line 44 of the file <file>.
    # To get rid of this warning, pass the additional argument 'features="html.parser"'
    # to the BeautifulSoup constructor.
    soup = BeautifulSoup(driver.page_source, features='html.parser')
    
    # Instantiated Chrome instance is no longer required
    # since we have already read the source
    driver.quit()

    # Code changes as per 28/07/2023
    # In case if elements are not located, please change the locators accordingly
    rows = soup.find_all('div', class_='w-full rounded border post')
    print("\nTotal items on " + url + " are " + str(len(rows)) + "\n")

    for row in rows:
        dress = row.find('h4')
        link = dress.find('a')
        price = row.find('h5')
        

        # Create a dictionary of the meta-data of the items on e-commerce store
        meta_data_dict = {
            'dress': dress.text.strip(),
            'link' : link.get_attribute_list('href'),
            'price': price.text.strip()
        }
        
        meta_data_arr.append(meta_data_dict)
    return meta_data_arr

if __name__ == '__main__':
    meta_data_arr = scrap_inifite_website(locators.test_bs4_infinite_url)
    helpers.print_scrapped_content(meta_data_arr)