dbigman/gist:b75f7cfe3efd49c2d2be68c8a99d8d52 Secret

## gistfile1.txt
#! python3

import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
import dateparser

import logging                                          # Setup code for error logging
logging.basicConfig(level=logging.DEBUG, format ='%(asctime)s - %(levelname)s - %(message)s') # basic logging format
# logging.disable(logging.CRITICAL) # disables logging at all levels.

logging.debug('Start of program')


def fetch_content(url):
    response = requests.get(url)
    return response.content

def parse_content(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')

    # Parsing the page for what we want.
    titles = soup.find_all('span', class_='title')

    # name of agency
    agencia_element = soup.find('div', class_='agency')
    agencia = agencia_element.text.replace('Agencia: ', '').strip()

    #Location
    localizacion_element = soup.find('div', class_='localization')
    localizacion = localizacion_element.text.replace('Localización: ', '').strip()

    # Status
    estatus_element = soup.find('div', class_='agency')
    estatus = estatus_element.text.replace('Estatus: ', '').strip()

    # Dates
    fechaApertura_elements = soup.find_all('div', class_='fechaApertura')
    fechaPresubasta_elements = soup.find_all('div', class_='fechaPreSubasta')
    fechaPliegos_elements = soup.find_all('div', class_='fechaPliegos')

    # Extract the date strings and remove the 'Fecha apertura:' and other prefixes.

    fechaApertura_strings = [element.text.strip().replace('Fecha apertura:', '').strip() for element in fechaApertura_elements]
    fechaPresubasta_strings = [element.text.strip().replace('Fecha pre-subasta:', '').strip() for element in fechaPresubasta_elements]
    fechaPliegos_strings = [element.text.strip().replace('Fecha pliegos:', '').strip() for element in fechaPliegos_elements]

    # Convert the date strings to 'date' objects

    fechaApertura = [parsed_date.date() for parsed_date in (dateparser.parse(fechaApertura_string) for fechaApertura_string in fechaApertura_strings) if parsed_date]
    fechaPresubasta = [parsed_date.date() for parsed_date in (dateparser.parse(fechaPresubasta_string) for fechaPresubasta_string in fechaPresubasta_strings) if parsed_date]
    fechaPliegos = [parsed_date.date() for parsed_date in (dateparser.parse(fechaPliegos_string) for fechaPliegos_string in fechaPliegos_strings) if parsed_date]

    # Create a list to store the scraped items
    items = []

    for i in range(len(titles)):
        item = {
            'title': titles[i].text.strip(),
            'fechaPliegos': fechaPliegos[i] if i < len(fechaPliegos) else None,
            'fechaApertura': fechaApertura[i] if i < len(fechaApertura) else None,
            'fechaPresubasta': fechaPresubasta[i] if i < len(fechaPresubasta) else None,
            'Localizacion': localizacion[i] if i < len(localizacion) else None,
            'Estatus': estatus[i] if i < len(estatus) else None,
            'Agencia': agencia[i] if i < len(agencia) else None

        }
        items.append(item)

    return items


def save_to_excel(data, file_name):
    df = pd.DataFrame(data)
    df.to_excel(file_name, index=False)


def main():
    url = 'https://subastas.pr.gov/Pages/subastas.aspx' # Target URL
    content = fetch_content(url)
    items = parse_content(content)

    excel_file_name = 'Subastas_scrape.xlsx'

    target_directory = r'C:\'
    os.chdir(target_directory)
    current_directory = os.getcwd()
    logging.debug('Current working directory: (%s)' % (current_directory))


    save_to_excel(items, excel_file_name)
    logging.debug('File saved: (%s)' % (excel_file_name))

if __name__ == '__main__':
    main()

logging.debug('End of program')
	#! python3

	import requests
	from bs4 import BeautifulSoup
	import pandas as pd
	import os
	import dateparser

	import logging # Setup code for error logging
	logging.basicConfig(level=logging.DEBUG, format ='%(asctime)s - %(levelname)s - %(message)s') # basic logging format
	# logging.disable(logging.CRITICAL) # disables logging at all levels.

	logging.debug('Start of program')


	def fetch_content(url):
	response = requests.get(url)
	return response.content

	def parse_content(html_content):
	soup = BeautifulSoup(html_content, 'html.parser')

	# Parsing the page for what we want.
	titles = soup.find_all('span', class_='title')

	# name of agency
	agencia_element = soup.find('div', class_='agency')
	agencia = agencia_element.text.replace('Agencia: ', '').strip()

	#Location
	localizacion_element = soup.find('div', class_='localization')
	localizacion = localizacion_element.text.replace('Localización: ', '').strip()

	# Status
	estatus_element = soup.find('div', class_='agency')
	estatus = estatus_element.text.replace('Estatus: ', '').strip()

	# Dates
	fechaApertura_elements = soup.find_all('div', class_='fechaApertura')
	fechaPresubasta_elements = soup.find_all('div', class_='fechaPreSubasta')
	fechaPliegos_elements = soup.find_all('div', class_='fechaPliegos')

	# Extract the date strings and remove the 'Fecha apertura:' and other prefixes.

	fechaApertura_strings = [element.text.strip().replace('Fecha apertura:', '').strip() for element in fechaApertura_elements]
	fechaPresubasta_strings = [element.text.strip().replace('Fecha pre-subasta:', '').strip() for element in fechaPresubasta_elements]
	fechaPliegos_strings = [element.text.strip().replace('Fecha pliegos:', '').strip() for element in fechaPliegos_elements]

	# Convert the date strings to 'date' objects

	fechaApertura = [parsed_date.date() for parsed_date in (dateparser.parse(fechaApertura_string) for fechaApertura_string in fechaApertura_strings) if parsed_date]
	fechaPresubasta = [parsed_date.date() for parsed_date in (dateparser.parse(fechaPresubasta_string) for fechaPresubasta_string in fechaPresubasta_strings) if parsed_date]
	fechaPliegos = [parsed_date.date() for parsed_date in (dateparser.parse(fechaPliegos_string) for fechaPliegos_string in fechaPliegos_strings) if parsed_date]

	# Create a list to store the scraped items
	items = []

	for i in range(len(titles)):
	item = {
	'title': titles[i].text.strip(),
	'fechaPliegos': fechaPliegos[i] if i < len(fechaPliegos) else None,
	'fechaApertura': fechaApertura[i] if i < len(fechaApertura) else None,
	'fechaPresubasta': fechaPresubasta[i] if i < len(fechaPresubasta) else None,
	'Localizacion': localizacion[i] if i < len(localizacion) else None,
	'Estatus': estatus[i] if i < len(estatus) else None,
	'Agencia': agencia[i] if i < len(agencia) else None

	}
	items.append(item)

	return items



	def save_to_excel(data, file_name):
	df = pd.DataFrame(data)
	df.to_excel(file_name, index=False)


	def main():
	url = 'https://subastas.pr.gov/Pages/subastas.aspx' # Target URL
	content = fetch_content(url)
	items = parse_content(content)

	excel_file_name = 'Subastas_scrape.xlsx'

	target_directory = r'C:\'
	os.chdir(target_directory)
	current_directory = os.getcwd()
	logging.debug('Current working directory: (%s)' % (current_directory))


	save_to_excel(items, excel_file_name)
	logging.debug('File saved: (%s)' % (excel_file_name))

	if __name__ == '__main__':
	main()

	logging.debug('End of program')