andrewljohnson/parse_magiccon.py

## parse_magiccon.py
from bs4 import BeautifulSoup
import pandas as pd

with open('magiccon.html', 'r', encoding='utf-8') as file:
    html_data = file.read()
soup = BeautifulSoup(html_data, 'html.parser')

# Initialize lists to store data
data = {
    'Title': [],
    'Cost': [],
    'Day': [],
    'Time': [],
    'Tournament Details': [],
    'Is Full': []
}

# Find all the registration schedule items
schedule_items = soup.find_all(class_='registration_schedule_item')

# Loop through each schedule item
for item in schedule_items:
    # Extract title and cost
    title = item.find('h3').text.strip()
    cost = title.split('-')[-1].strip()

    # Extract day and time
    schedule_info = item.find(class_='text-info').text.strip()
    day, time = schedule_info.split(' from ')

    # Extract tournament details
    registration_details = item.find_all('p')[1].text.strip()
    tournament_details = registration_details.split('TOURNAMENT DETAILS')[0]
    tournament_details = tournament_details.split('REGISTRATION DETAILS')[0]
    # Check if the event is full
    is_full = item.find(class_='well text-center')

    # Append data to the lists
    data['Title'].append(title)
    data['Cost'].append(cost)
    data['Day'].append(day)
    data['Time'].append(time)
    data['Tournament Details'].append(tournament_details)
    data['Is Full'].append(is_full)

# Create a DataFrame from the data
df = pd.DataFrame(data)

# Save to an Excel file
df.to_excel('tournament_schedule.xlsx', index=False)
	from bs4 import BeautifulSoup
	import pandas as pd

	with open('magiccon.html', 'r', encoding='utf-8') as file:
	html_data = file.read()
	soup = BeautifulSoup(html_data, 'html.parser')

	# Initialize lists to store data
	data = {
	'Title': [],
	'Cost': [],
	'Day': [],
	'Time': [],
	'Tournament Details': [],
	'Is Full': []
	}

	# Find all the registration schedule items
	schedule_items = soup.find_all(class_='registration_schedule_item')

	# Loop through each schedule item
	for item in schedule_items:
	# Extract title and cost
	title = item.find('h3').text.strip()
	cost = title.split('-')[-1].strip()

	# Extract day and time
	schedule_info = item.find(class_='text-info').text.strip()
	day, time = schedule_info.split(' from ')

	# Extract tournament details
	registration_details = item.find_all('p')[1].text.strip()
	tournament_details = registration_details.split('TOURNAMENT DETAILS')[0]
	tournament_details = tournament_details.split('REGISTRATION DETAILS')[0]
	# Check if the event is full
	is_full = item.find(class_='well text-center')

	# Append data to the lists
	data['Title'].append(title)
	data['Cost'].append(cost)
	data['Day'].append(day)
	data['Time'].append(time)
	data['Tournament Details'].append(tournament_details)
	data['Is Full'].append(is_full)

	# Create a DataFrame from the data
	df = pd.DataFrame(data)

	# Save to an Excel file
	df.to_excel('tournament_schedule.xlsx', index=False)