refabr1k/ciscat_html_to_excel.py

## ciscat_html_to_excel.py
from bs4 import BeautifulSoup
import csv

def extract_data_from_html(html_path):
    with open(html_path, 'r', encoding='utf-8') as file:
        html_content = file.read()

    soup = BeautifulSoup(html_content, 'html.parser')
    extracted_data = []

    # Find all the elements for ruleTitle, description, rationale, and fixtext
    rule_titles = soup.find_all(class_='ruleTitle')
    descriptions = soup.find_all(class_='description')
    rationales = soup.find_all(class_='rationale')
    fixtexts = soup.find_all(class_='fixtext')

    for i, title in enumerate(rule_titles):
        # Initialize data dictionary
        data = {
            'Title': title.get_text(strip=True),
            'Description': descriptions[i].get_text(separator=' ', strip=True) if i < len(descriptions) else '',
            'Rationale': rationales[i].get_text(separator=' ', strip=True) if i < len(rationales) else '',
            'FixText': '',
            'Impact': ''
        }

        # Extract FixText and Impact
        if i < len(fixtexts):
            fixtext_full_text = fixtexts[i].get_text(separator=' ', strip=True)
            # Find the index where "Impact:" starts
            impact_index = fixtext_full_text.find('Impact:')
            if impact_index != -1:
                # Extract Impact text and remove it from FixText
                data['Impact'] = fixtext_full_text[impact_index:].replace('Impact:', '').strip()
                data['FixText'] = fixtext_full_text[:impact_index].strip()
            else:
                data['FixText'] = fixtext_full_text

        extracted_data.append(data)

    return extracted_data

# Example usage
html_path = 'findings.html'  # Replace with your HTML file path
csv_file_path = 'extracted_data.csv'  # Path for the output CSV file

data_to_write = extract_data_from_html(html_path)

# Write the extracted data to a CSV file
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=['Title', 'Description', 'Rationale', 'FixText', 'Impact'])
    writer.writeheader()
    for data in data_to_write:
        writer.writerow(data)

print(f"Data has been extracted and written to {csv_file_path}")
	from bs4 import BeautifulSoup
	import csv

	def extract_data_from_html(html_path):
	with open(html_path, 'r', encoding='utf-8') as file:
	html_content = file.read()

	soup = BeautifulSoup(html_content, 'html.parser')
	extracted_data = []

	# Find all the elements for ruleTitle, description, rationale, and fixtext
	rule_titles = soup.find_all(class_='ruleTitle')
	descriptions = soup.find_all(class_='description')
	rationales = soup.find_all(class_='rationale')
	fixtexts = soup.find_all(class_='fixtext')

	for i, title in enumerate(rule_titles):
	# Initialize data dictionary
	data = {
	'Title': title.get_text(strip=True),
	'Description': descriptions[i].get_text(separator=' ', strip=True) if i < len(descriptions) else '',
	'Rationale': rationales[i].get_text(separator=' ', strip=True) if i < len(rationales) else '',
	'FixText': '',
	'Impact': ''
	}

	# Extract FixText and Impact
	if i < len(fixtexts):
	fixtext_full_text = fixtexts[i].get_text(separator=' ', strip=True)
	# Find the index where "Impact:" starts
	impact_index = fixtext_full_text.find('Impact:')
	if impact_index != -1:
	# Extract Impact text and remove it from FixText
	data['Impact'] = fixtext_full_text[impact_index:].replace('Impact:', '').strip()
	data['FixText'] = fixtext_full_text[:impact_index].strip()
	else:
	data['FixText'] = fixtext_full_text

	extracted_data.append(data)

	return extracted_data

	# Example usage
	html_path = 'findings.html' # Replace with your HTML file path
	csv_file_path = 'extracted_data.csv' # Path for the output CSV file

	data_to_write = extract_data_from_html(html_path)

	# Write the extracted data to a CSV file
	with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
	writer = csv.DictWriter(csvfile, fieldnames=['Title', 'Description', 'Rationale', 'FixText', 'Impact'])
	writer.writeheader()
	for data in data_to_write:
	writer.writerow(data)

	print(f"Data has been extracted and written to {csv_file_path}")