Skip to content

Instantly share code, notes, and snippets.

@arempe93
Last active June 21, 2019 14:41
Show Gist options
  • Save arempe93/66a2a0a6e0491d16df0d to your computer and use it in GitHub Desktop.
Save arempe93/66a2a0a6e0491d16df0d to your computer and use it in GitHub Desktop.
NHL Stats Scraping Information
module DatabaseHelper
require 'active_record'
require 'pg'
ActiveRecord::Base.establish_connection(
adapter: 'postgresql',
database: 'nhlstats_development',
username: ENV['PG_USER'],
password: ENV['PG_PASS'],
host: 'localhost'
)
class Team < ActiveRecord::Base
end
class Player < ActiveRecord::Base
end
def print_all
puts "\nTeams\n=====\n"
Team.all.each do |t|
puts t.inspect
end
puts "\n#{Team.count} records.\n"
puts "\nPlayers\n=======\n"
Player.all.each do |p|
puts p.inspect
end
puts "\n#{Player.count} records.\n"
end
end
# Pulls NHL team information
# Require
require 'rubygems'
require 'json'
require 'open-uri'
# Stop loop on this game id
stopping_point = 2014020092
# Hashes to store information
names = Hash.new
numbers = Hash.new
teams = Hash.new
# Season file
season_file = open("http://live.nhl.com/GameData/SeasonSchedule-20142015.json")
season = JSON.parse season_file.read
# Loop through all games
season.each do |game|
# Get game id
id = game['id']
# Limit loop
break if id == stopping_point
puts "Opening game: #{id}"
# Open stats file
stats_file = open("http://live.nhl.com/GameData/20142015/#{id}/PlayByPlay.json")
plays = JSON.parse(stats_file.read)['data']['game']['plays']['play']
plays.each do |play|
# Get the player id
player_id = play['pid']
# Skip this play if the player has been retrieved or doesn't exist
next if not player_id or names.has_key?(player_id)
# Get player information
names[player_id] = play['playername']
numbers[player_id] = play['sweater']
teams[player_id] = play['teamid']
end
end
# Print out findings
names.each do |id, name|
puts "#{id} | #{name} | #{numbers[id]} | #{teams[id]}"
end
puts "Found #{names.count} players"
# Pulls NHL team information
# Require
require 'rubygems'
require 'json'
require 'open-uri'
# Stop loop on this game id
stopping_point = 2014020018
# Hashes to store information
ids = Hash.new(0)
names = Hash.new
# Season file
season_file = open("http://live.nhl.com/GameData/SeasonSchedule-20142015.json")
season = JSON.parse season_file.read
# Loop through all games
season.each do |game|
# Get game id
id = game['id']
# Limit loop
break if id == stopping_point
puts "Opening game: #{id}"
# Get team abbreviations
home_team_abbv = game['h']
away_team_abbv = game['a']
# Skip this game if both teams have already been scraped
next if ids.has_key?(home_team_abbv) and ids.has_key?(away_team_abbv)
# Open stats file
stats_file = open("http://live.nhl.com/GameData/20142015/#{id}/PlayByPlay.json")
stats = JSON.parse stats_file.read
# Get and store team information
ids[home_team_abbv] = stats['data']['game']['hometeamid']
names[home_team_abbv] = stats['data']['game']['hometeamname']
ids[away_team_abbv] = stats['data']['game']['awayteamid']
names[away_team_abbv] = stats['data']['game']['awayteamname']
end
# Print out findings
ids.each do |abbv, id|
puts "|#{abbv}|\t|#{names[abbv]}|\t\t|#{id}|"
end
# Pulls NHL team and player information and stores it in a database
# Require
require 'rubygems'
require 'json'
require 'open-uri'
# Include database code
require_relative 'database_helper'
include DatabaseHelper
# Drop current tables
Team.delete_all
Player.delete_all
# Stop loop on this game id
stopping_point = 2014020117
# Season file
season_file = open("http://live.nhl.com/GameData/SeasonSchedule-20142015.json")
season = JSON.parse season_file.read
# Loop through all games
season.each do |game|
# Get game id
id = game['id']
# Limit loop
break if id == stopping_point
puts "Opening game: #{id}"
# Get team abbreviations
home_team_abbv = game['h']
away_team_abbv = game['a']
# Open stats file
stats_file = open("http://live.nhl.com/GameData/20142015/#{id}/PlayByPlay.json")
stats = JSON.parse(stats_file.read)['data']['game']
# Get team ids
home_team_id = stats['hometeamid']
away_team_id = stats['awayteamid']
# Skip team scraping if both teams have already been scraped
unless Team.find_by(nhl_id: home_team_id) and Team.find_by(nhl_id: away_team_id)
# Get and store team information
home_name = stats['hometeamname'].split ' '
away_name = stats['awayteamname'].split ' '
Team.create(nhl_id: home_team_id, city: home_name.first(home_name.count - 1).join(' '), name: home_name.last, abbv: home_team_abbv) unless Team.find_by(nhl_id: stats['hometeamid'])
Team.create(nhl_id: away_team_id, city: away_name.first(away_name.count - 1).join(' '), name: away_name.last, abbv: away_team_abbv) unless Team.find_by(nhl_id: stats['awayteamid'])
end
# Loop through all game plays
stats['plays']['play'].each do |play|
# Get goalie information if possible
if play['type'] == 'Shot'
# Skip this goalie if already stored
next if Player.find_by(nhl_id: play['pid2'])
Player.create(nhl_id: play['pid2'], team_id: play['teamid'] == home_team_id ? away_team_id : home_team_id, name: play['p2name'], player_type: 'G')
end
# Get the player id
player_id = play['pid']
# Skip this play if the player has been retrieved or doesn't exist
next if not player_id or Player.find_by(nhl_id: player_id)
# Also skip penalties with a 3rd man (Goalie penalty)
next if play['type'] == 'Penalty' and play['pid3']
# Get player information
Player.create(nhl_id: player_id, team_id: play['teamid'], name: play['playername'], sweater: play['sweater'], player_type: 'S')
end
end

NHL Stats Scraping

PlayByPlay

Has detailed information on all events of the following types:

  • Shots
  • Hits
  • Goals
  • Penalties

This information includes:

  • Time of the play
  • Period
  • Score
  • Player name/number
  • Secondary players involved
  • Type
  • Players on the ice

GCBX

###Players###

Has final type stats for all players that played in the game including:

  • Jersey Number
  • Goals
  • Assists
  • Shots
  • PIMS
  • Time on Ice
  • Plus/Minus

###Goalies###

This includes goalies with the following information:

  • Shots Against
  • Saves
  • Save Percentage
  • Goals Against
  • Time on Ice

###Teams###

Has team stat information including:

  • Blocks
  • Takeaways
  • Power Play Percentage
  • Team PIM
  • Giveaways
  • Faceoff Wins
  • Hits

###Shots####

Includes a shot summary by period

###Penalties###

Includes a penalty summary by period

###Goals####

Includes a goal summary by period

@elliotstone
Copy link

I'm trying to use a visualization tool to show shots and other characteristics of games but I have no idea on how to use this scrubber to do that. Is there anyway to help me out here.

Thanks in advance.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment