veltman/gist:8921688

## gistfile1.py
import requests
import re
from bs4 import BeautifulSoup

contents = requests.get("http://www.nhl.com/scores/htmlreports/20022003/PL020413.HTM").text

def NotEmpty(str):
	return not re.match("^\s*$",str)

def SplitOnColumns(unsplit,positions):
	result = []
	for column in positions:
		result.append(unsplit[column[0]:column[1]+1].strip())
	return result

soup = BeautifulSoup(contents)

table = soup.find("pre").text

# Get all nonempty rows
rows = filter(NotEmpty,table.split("\r\n"))

# For tracking col positions
columns = []

# Find the dashes row
for row in rows:
	# If it's all spaces/dashes with at least one dash
	if re.match("^[\s-]+$",row) and not re.match("^\s+$",row):
		# Loop through characters, find the start and end position of each column
		for i,char in enumerate(row):
			if char == "-":
				if i == 0 or row[i-1] != "-":
					start = i
				if i == len(row)-1 or row[i+1] != "-":
					end = i
					columns.append([start,end])
		break

# Get an array of headers
headers = SplitOnColumns(rows[0],columns)

as_json = []

# For all rows besides the first two
for row in rows[2:]:

	# Empty object
	objectified = {}

	# For each column, set objectified[header] to that value
	for i,value in enumerate(SplitOnColumns(row,columns)):
		objectified[headers[i]] = value

	as_json.append(objectified)

print as_json
	import requests
	import re
	from bs4 import BeautifulSoup

	contents = requests.get("http://www.nhl.com/scores/htmlreports/20022003/PL020413.HTM").text

	def NotEmpty(str):
	return not re.match("^\s*$",str)

	def SplitOnColumns(unsplit,positions):
	result = []
	for column in positions:
	result.append(unsplit[column[0]:column[1]+1].strip())
	return result

	soup = BeautifulSoup(contents)

	table = soup.find("pre").text

	# Get all nonempty rows
	rows = filter(NotEmpty,table.split("\r\n"))

	# For tracking col positions
	columns = []

	# Find the dashes row
	for row in rows:
	# If it's all spaces/dashes with at least one dash
	if re.match("^[\s-]+$",row) and not re.match("^\s+$",row):
	# Loop through characters, find the start and end position of each column
	for i,char in enumerate(row):
	if char == "-":
	if i == 0 or row[i-1] != "-":
	start = i
	if i == len(row)-1 or row[i+1] != "-":
	end = i
	columns.append([start,end])
	break

	# Get an array of headers
	headers = SplitOnColumns(rows[0],columns)

	as_json = []

	# For all rows besides the first two
	for row in rows[2:]:

	# Empty object
	objectified = {}

	# For each column, set objectified[header] to that value
	for i,value in enumerate(SplitOnColumns(row,columns)):
	objectified[headers[i]] = value

	as_json.append(objectified)

	print as_json