MCMXCIII/Latest-Build.py

## Latest-Build.py
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import csv
import time
import os
import re

no_last_weeks = str(input("Number of last weeks: ")).strip()
file_name = str(input("Name of the output file to be saved: ")).strip()

try:
    os.remove(f"{file_name}.csv")
except:
    pass

def data_urls():
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    items = soup.find_all("input",{"title":"Click to open the record."})
    for item in items:
        url = "https://www.georgiapublicnotice.com/" + item["onclick"].replace("javascript:location.href='","").replace("';return false;","")
        urls.append(url)

try:
    url = "https://www.georgiapublicnotice.com/"
    driver = webdriver.Chrome()
    driver.get(url)

    time.sleep(2)
    search = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_txtSearch"]""")
    search.send_keys("SALE UNDER POWER")

    exact_phrase = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_rdoType"]/li[3]/label""")
    exact_phrase.click()
    time.sleep(5)

    plus = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_divDateRange"]/label/a""")
    plus.click()
    data_range = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_rbLastNumWeeks"]""")
    data_range.click()
    last_weeks = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_txtLastNumWeeks"]""")
    last_weeks.clear()
    last_weeks.send_keys(no_last_weeks)

    submit = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_btnGo"]""")
    submit.click()
    time.sleep(5)

    set_50 = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_WSExtendedGridNP1_GridView1_ctl01_ddlPerPage"]/option[7]""")
    set_50.click()
    time.sleep(5)

    total_pages = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_WSExtendedGridNP1_GridView1_ctl01_lblTotalPages"]""").text
    total_pages = int((total_pages.replace("of ","").replace(" Pages","")).strip())

    urls = []
    data_urls()
    for _ in range(total_pages-1):
        next_page = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_WSExtendedGridNP1_GridView1_ctl01_btnNext"]""")
        next_page.click()
        time.sleep(5)
        data_urls()

    with open(f'{file_name}.csv', 'a', newline='') as new_file:
        csv_writer = csv.writer(new_file)
        csv_writer.writerow(["Publication Name","Publication Url","Publication City","Publication State","Publication County","Notice Keywords","Notice Auth No","Notice Url","Notice Publish Date","Content Text","Address"])
    for url in urls:
        driver.get(url)
        time.sleep(3)

        soup_x = BeautifulSoup(driver.page_source, 'html.parser')
        publication_name = str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblPubName").text).strip()
        publication_url= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lnkPubURL").text).strip()
        publication_city= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblCity").text).strip()
        publication_state= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblState").text).strip()
        publication_county= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblCounty").text).strip()
        notice_keywords= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblKeywords").text).strip()
        notice_auth_no= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblNoticeAuthenticationNo").text).strip()
        notice_url = str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lnkNoticeURL").text).strip()
        notice_publish_date = str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_lblPublicationDAte").text).strip()
        content_text = str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_lblContentText").text).replace('\n', '')
        address = str(re.findall("is known as (\d+ [\w ,\.\-\/\#']+), ([\w \.\-']+), ([A-Z]{2}) ([-\dA-Z]+)",content_text)).text
        with open(f'{file_name}.csv', 'a', newline='',encoding="utf-8") as new_file:
            csv_writer = csv.writer(new_file)
            csv_writer.writerow([publication_name,publication_url,publication_city,publication_state,publication_county,notice_keywords,notice_auth_no,notice_url,notice_publish_date,content_text,address])
    driver.quit()
except:
    driver.quit()
    print("please try again...")
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys
	from bs4 import BeautifulSoup
	import csv
	import time
	import os
	import re

	no_last_weeks = str(input("Number of last weeks: ")).strip()
	file_name = str(input("Name of the output file to be saved: ")).strip()

	try:
	os.remove(f"{file_name}.csv")
	except:
	pass

	def data_urls():
	soup = BeautifulSoup(driver.page_source, 'html.parser')
	items = soup.find_all("input",{"title":"Click to open the record."})
	for item in items:
	url = "https://www.georgiapublicnotice.com/" + item["onclick"].replace("javascript:location.href='","").replace("';return false;","")
	urls.append(url)

	try:
	url = "https://www.georgiapublicnotice.com/"
	driver = webdriver.Chrome()
	driver.get(url)

	time.sleep(2)
	search = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_txtSearch"]""")
	search.send_keys("SALE UNDER POWER")

	exact_phrase = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_rdoType"]/li[3]/label""")
	exact_phrase.click()
	time.sleep(5)

	plus = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_divDateRange"]/label/a""")
	plus.click()
	data_range = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_rbLastNumWeeks"]""")
	data_range.click()
	last_weeks = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_txtLastNumWeeks"]""")
	last_weeks.clear()
	last_weeks.send_keys(no_last_weeks)

	submit = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_as1_btnGo"]""")
	submit.click()
	time.sleep(5)

	set_50 = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_WSExtendedGridNP1_GridView1_ctl01_ddlPerPage"]/option[7]""")
	set_50.click()
	time.sleep(5)

	total_pages = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_WSExtendedGridNP1_GridView1_ctl01_lblTotalPages"]""").text
	total_pages = int((total_pages.replace("of ","").replace(" Pages","")).strip())

	urls = []
	data_urls()
	for _ in range(total_pages-1):
	next_page = driver.find_element_by_xpath("""//*[@id="ctl00_ContentPlaceHolder1_WSExtendedGridNP1_GridView1_ctl01_btnNext"]""")
	next_page.click()
	time.sleep(5)
	data_urls()

	with open(f'{file_name}.csv', 'a', newline='') as new_file:
	csv_writer = csv.writer(new_file)
	csv_writer.writerow(["Publication Name","Publication Url","Publication City","Publication State","Publication County","Notice Keywords","Notice Auth No","Notice Url","Notice Publish Date","Content Text","Address"])
	for url in urls:
	driver.get(url)
	time.sleep(3)

	soup_x = BeautifulSoup(driver.page_source, 'html.parser')
	publication_name = str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblPubName").text).strip()
	publication_url= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lnkPubURL").text).strip()
	publication_city= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblCity").text).strip()
	publication_state= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblState").text).strip()
	publication_county= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblCounty").text).strip()
	notice_keywords= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblKeywords").text).strip()
	notice_auth_no= str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lblNoticeAuthenticationNo").text).strip()
	notice_url = str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_PublicNoticeDetails1_lnkNoticeURL").text).strip()
	notice_publish_date = str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_lblPublicationDAte").text).strip()
	content_text = str(soup_x.select_one("#ctl00_ContentPlaceHolder1_PublicNoticeDetailsBody1_lblContentText").text).replace('\n', '')
	address = str(re.findall("is known as (\d+ [\w ,\.\-\/\#']+), ([\w \.\-']+), ([A-Z]{2}) ([-\dA-Z]+)",content_text)).text
	with open(f'{file_name}.csv', 'a', newline='',encoding="utf-8") as new_file:
	csv_writer = csv.writer(new_file)
	csv_writer.writerow([publication_name,publication_url,publication_city,publication_state,publication_county,notice_keywords,notice_auth_no,notice_url,notice_publish_date,content_text,address])
	driver.quit()
	except:
	driver.quit()
	print("please try again...")