Last active
April 30, 2024 06:48
-
-
Save xshapira/7d171e43f8708ce754d4e6de8bbef394 to your computer and use it in GitHub Desktop.
Selenium-lotto-results
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This script will: | |
- open a Firefox browser | |
- navigate to the form page | |
- fill out the form and submit it | |
- navigate to the results page | |
- scrape the data from the page and close the browser | |
""" | |
import json | |
import logging | |
from contextlib import contextmanager | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
logging.basicConfig( | |
level=logging.INFO, | |
format="%(asctime)s - [%(levelname)s]: %(message)s", | |
datefmt="%d-%b-%y %H:%M:%S", | |
) | |
log = logging.getLogger(__name__) | |
@contextmanager | |
def browser(): | |
driver = webdriver.Firefox() | |
yield driver | |
driver.close() | |
def fill_out_form(driver: webdriver.Firefox, number: int) -> None: | |
""" | |
Navigate to the web page, locates the form element, enters a value | |
into it and clicks on a submit button. | |
:param driver: Navigate to the web page | |
:param number: Pass the number that is searched for in the form | |
""" | |
# Navigate to the web page | |
driver.get("https://pais.co.il/Lotto/archive.aspx") | |
search_by_number = driver.find_element(By.ID, "searchByNumber") | |
search_by_number.click() | |
# Locate the form element | |
from_number = driver.find_element(By.ID, "fromNumber") | |
to_number = driver.find_element(By.ID, "toNumber") | |
submit_button = driver.find_element( | |
By.XPATH, | |
"//a[@class='archive_form_button num w-inline-block']", | |
) | |
# Enter value into the form field | |
from_number.send_keys(number) | |
to_number.send_keys(number) | |
# Click the submit button | |
submit_button.click() | |
def scrape_data(driver: webdriver.Firefox, number: int) -> str: | |
""" | |
Scrape the data from the website. | |
It takes a driver object and a number as arguments, fills out the form | |
with that number, navigates to the web page and locates all of its elements. It then creates a dictionary | |
with two keys: lotto_results - which contains all of the numbers | |
in string format separated by commas; and strong_num - which contains | |
only one number in integer format. | |
:param driver: Navigate to the web page | |
:param number: Define the number of results to scrape | |
:return: A json string of the data retrieved from the web page | |
""" | |
# Fill out the form | |
fill_out_form(driver, number) | |
# Navigate to the web page | |
driver.get( | |
f"https://pais.co.il/lotto/showMoreResults.aspx?fromIndex=0&amount=1&fromNumber={number}&toNumber={number}" | |
) | |
# Locate the data on the page | |
lotto_numbers = driver.find_element( | |
By.XPATH, | |
"//div[@class='archive_list_block numbers']", | |
) | |
lotto_strong_num = driver.find_element( | |
By.XPATH, | |
"//div[@class='loto_info_num strong archive']", | |
) | |
# Creating a dictionary with the retrieved content | |
data = { | |
"lotto_results": lotto_numbers.text.strip().replace("\n", ", "), | |
"strong_num": lotto_strong_num.text.strip().replace("\n", " "), | |
} | |
# Convert the dictionary to JSON string | |
data_json = json.dumps(data) | |
return data_json | |
def main(): | |
# Open a browser | |
with browser() as driver: | |
# Fill out the form and submit it | |
number = 3540 | |
fill_out_form(driver, number) | |
# Scrape the data | |
data_json = scrape_data(driver, number) | |
# Parse the JSON data | |
data = json.loads(data_json) | |
# Log the data to the console. | |
log.info(data) | |
# The browser is automatically closed here | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment