Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Scrape ballroom dance competition result from (HTML) result generated by Skating System Software.
from bs4 import BeautifulSoup
import urllib2
import re
import sys
import codecs
def process_folder(folder_url):
page = urllib2.urlopen(folder_url)
soup = BeautifulSoup(page, "html.parser")
# Print event (folder) name
print('"' + soup.find("span", class_="competition").get_text().strip() + '",,')
rows = soup.find_all("tr", class_=re.compile("(row|row0dd)"))
for i in range(0, len(rows)):
# Print ranking, competitor bib number, competitor name
print('%s,%s,%s' % (
rows[i].find("td", class_="center").get_text(),
rows[i].find("td", class_="centerHeader").get_text(),
rows[i].find("td", class_="left").get_text(),
def main():
# TODO: Set your URL path here (excluding "index.html").
main_page = ""
page = urllib2.urlopen(main_page)
soup = BeautifulSoup(page, "html.parser")
for folder in soup.find_all("a", class_="nav1"):
path = folder.get("href")
if path.find("folder") == -1:
process_folder(main_page + path.replace(".html", ".res.html"))
if __name__ == "__main__":
# Ref:
# Fix for `UnicodeEncodeError` piped output issue.
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
sys.stderr = codecs.getwriter('utf8')(sys.stderr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment