Skip to content

Instantly share code, notes, and snippets.

@veltman
Created February 10, 2014 18:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save veltman/8921366 to your computer and use it in GitHub Desktop.
Save veltman/8921366 to your computer and use it in GitHub Desktop.
import requests
import re
from bs4 import BeautifulSoup
contents = requests.get("http://www.nhl.com/scores/htmlreports/20022003/PL020413.HTM").text
soup = BeautifulSoup(contents)
table = soup.find("pre").text
rows = table.split("\r\n")[1:]
#print rows
columns = []
for row in rows:
if re.match("^[\s-]+$",row) and not re.match("^\s+$",row):
for i in range(len(row)):
if row[i] == "-":
if i == 0 or row[i-1] != "-":
start = i
if i == len(row)-1 or row[i+1] != "-":
end = i
columns.append([start,end])
break
results = []
for row in rows:
if re.match("^\s+$",row):
continue
result = []
for column in columns:
result.append(row[column[0]:column[1]+1].strip())
results.append(result)
as_json = []
for result in results[2:]:
objectified = {}
for i in range(len(results[0])):
objectified[results[0][i]] = result[i]
as_json.append(objectified)
print as_json
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment