tangotiger

## runs_to_end_of_inning.sql
with

rs as (
	select
		game_pk
		, event_inning
		, top_inning_sw
		, max(bat_score) as bat_score_end_of_inning
		, sum(event_outs) as event_outs


## oracle_to_pythonize.sql
WITH

rs as (
	select
		instr(play_string,',',1,6) as comma_position
		, play_string
	from mytable
	where substr(play_string,1,4)='play'
)

## parseSchedule.py
import json

print("Parse start")

# http://live.nhl.com/GameData/SeasonSchedule-20152016.json
sourcefile = "C:/Users/TOM/DataNHL/original/SeasonSchedule.json"
targetfile = "C:/Users/TOM/DataNHL/final/parsed_SeasonSchedule.csv"
htmltargetfile = "C:/Users/TOM/DataNHL/final/parsed_SeasonSchedule.htm"

header_row='NHL_GAME_ID' \

## snippet_csv_to_sqlite.py
# this takes a standard csv file and then loads it into a databse
# question: what if instead of a csv file, I had an equivalent <table><tr><td> formatted file?

sourcefile = "skeleton.csv"
dshape = discover(resource(sourcefile))

for game_id in list_game_id:
	sourcefile = "formatted_{g}.csv".format(g=game_id)
	with open(sourcefile,'r') as infile:
	    odo(sourcefile, targetfile, dshape=dshape)

## parsePlaybyplay.py
from bs4 import BeautifulSoup
import re

print("Parse start")

first_game_id = 1  #+665
last_game_id = 668  #-2

season_id = '20152016'
subseason_id = '02'

## stripPlaybyplay.py
# takes 2 seconds to run per infile
# source for infile: http://www.nhl.com/scores/htmlreports/20152016/PL020666.HTM

from bs4 import BeautifulSoup

print("Parse start")

first_game_id = 1  #+665
last_game_id = 668  #-2

## parseRoster.py
from bs4 import BeautifulSoup

print("Parse start")

first_game_id = 1  #+665
last_game_id = 668 #-2

season_id = '20152016'
subseason_id = '02'
datafile_id = 'RO'

## stripRoster.py
print("Parse start")

first_game_id = 1  #+665
last_game_id = 668 #-2

subseason_id = '02'
datafile_id = 'RO'

searchstr1 = '#</td>'
searchstr2 = 'Pos</td>'

## parseTable.py
import urllib.request
from bs4 import BeautifulSoup

print("Parse start")

sourcefile = "file:///C:/Users/TOM/PycharmProjects/downloadNHL/datafiles/RO020666_partial.HTM"

html = urllib.request.urlopen(sourcefile)
soup = BeautifulSoup(html, "lxml")

## parseSchedule.py
print("Parse start")

sourcefile = "C:\\Users\\TOM\\PycharmProjects\\downloadNHL\\datafiles\\schedulebyseason.htm"
targetfile = "C:\\Users\\TOM\\PycharmProjects\\downloadNHL\\datafiles\\parsed_schedulebyseason.txt"

searchstr = "recap?id="

sample_recstr = "2015020001"
reclen = len(sample_recstr)
	with

	rs as (
	select
	game_pk
	, event_inning
	, top_inning_sw
	, max(bat_score) as bat_score_end_of_inning
	, sum(event_outs) as event_outs
	import json

	print("Parse start")

	# http://live.nhl.com/GameData/SeasonSchedule-20152016.json
	sourcefile = "C:/Users/TOM/DataNHL/original/SeasonSchedule.json"
	targetfile = "C:/Users/TOM/DataNHL/final/parsed_SeasonSchedule.csv"
	htmltargetfile = "C:/Users/TOM/DataNHL/final/parsed_SeasonSchedule.htm"

	header_row='NHL_GAME_ID' \
	# this takes a standard csv file and then loads it into a databse
	# question: what if instead of a csv file, I had an equivalent <table><tr><td> formatted file?

	sourcefile = "skeleton.csv"
	dshape = discover(resource(sourcefile))

	for game_id in list_game_id:
	sourcefile = "formatted_{g}.csv".format(g=game_id)
	with open(sourcefile,'r') as infile:
	odo(sourcefile, targetfile, dshape=dshape)
	from bs4 import BeautifulSoup
	import re

	print("Parse start")

	first_game_id = 1 #+665
	last_game_id = 668 #-2

	season_id = '20152016'
	subseason_id = '02'
	# takes 2 seconds to run per infile
	# source for infile: http://www.nhl.com/scores/htmlreports/20152016/PL020666.HTM

	from bs4 import BeautifulSoup

	print("Parse start")

	first_game_id = 1 #+665
	last_game_id = 668 #-2
	import urllib.request
	from bs4 import BeautifulSoup

	print("Parse start")

	sourcefile = "file:///C:/Users/TOM/PycharmProjects/downloadNHL/datafiles/RO020666_partial.HTM"

	html = urllib.request.urlopen(sourcefile)
	soup = BeautifulSoup(html, "lxml")
	print("Parse start")

	sourcefile = "C:\\Users\\TOM\\PycharmProjects\\downloadNHL\\datafiles\\schedulebyseason.htm"
	targetfile = "C:\\Users\\TOM\\PycharmProjects\\downloadNHL\\datafiles\\parsed_schedulebyseason.txt"

	searchstr = "recap?id="

	sample_recstr = "2015020001"
	reclen = len(sample_recstr)