Created
September 14, 2023 08:41
-
-
Save andrewljohnson/5e3441484ca701348d6a1250908be0f9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import pandas as pd | |
with open('magiccon.html', 'r', encoding='utf-8') as file: | |
html_data = file.read() | |
soup = BeautifulSoup(html_data, 'html.parser') | |
# Initialize lists to store data | |
data = { | |
'Title': [], | |
'Cost': [], | |
'Day': [], | |
'Time': [], | |
'Tournament Details': [], | |
'Is Full': [] | |
} | |
# Find all the registration schedule items | |
schedule_items = soup.find_all(class_='registration_schedule_item') | |
# Loop through each schedule item | |
for item in schedule_items: | |
# Extract title and cost | |
title = item.find('h3').text.strip() | |
cost = title.split('-')[-1].strip() | |
# Extract day and time | |
schedule_info = item.find(class_='text-info').text.strip() | |
day, time = schedule_info.split(' from ') | |
# Extract tournament details | |
registration_details = item.find_all('p')[1].text.strip() | |
tournament_details = registration_details.split('TOURNAMENT DETAILS')[0] | |
tournament_details = tournament_details.split('REGISTRATION DETAILS')[0] | |
# Check if the event is full | |
is_full = item.find(class_='well text-center') | |
# Append data to the lists | |
data['Title'].append(title) | |
data['Cost'].append(cost) | |
data['Day'].append(day) | |
data['Time'].append(time) | |
data['Tournament Details'].append(tournament_details) | |
data['Is Full'].append(is_full) | |
# Create a DataFrame from the data | |
df = pd.DataFrame(data) | |
# Save to an Excel file | |
df.to_excel('tournament_schedule.xlsx', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment