Skip to content

Instantly share code, notes, and snippets.

@andrewljohnson
Created September 14, 2023 08:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewljohnson/5e3441484ca701348d6a1250908be0f9 to your computer and use it in GitHub Desktop.
Save andrewljohnson/5e3441484ca701348d6a1250908be0f9 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import pandas as pd
with open('magiccon.html', 'r', encoding='utf-8') as file:
html_data = file.read()
soup = BeautifulSoup(html_data, 'html.parser')
# Initialize lists to store data
data = {
'Title': [],
'Cost': [],
'Day': [],
'Time': [],
'Tournament Details': [],
'Is Full': []
}
# Find all the registration schedule items
schedule_items = soup.find_all(class_='registration_schedule_item')
# Loop through each schedule item
for item in schedule_items:
# Extract title and cost
title = item.find('h3').text.strip()
cost = title.split('-')[-1].strip()
# Extract day and time
schedule_info = item.find(class_='text-info').text.strip()
day, time = schedule_info.split(' from ')
# Extract tournament details
registration_details = item.find_all('p')[1].text.strip()
tournament_details = registration_details.split('TOURNAMENT DETAILS')[0]
tournament_details = tournament_details.split('REGISTRATION DETAILS')[0]
# Check if the event is full
is_full = item.find(class_='well text-center')
# Append data to the lists
data['Title'].append(title)
data['Cost'].append(cost)
data['Day'].append(day)
data['Time'].append(time)
data['Tournament Details'].append(tournament_details)
data['Is Full'].append(is_full)
# Create a DataFrame from the data
df = pd.DataFrame(data)
# Save to an Excel file
df.to_excel('tournament_schedule.xlsx', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment