Skip to content

Instantly share code, notes, and snippets.

Last active June 21, 2019 14:41
Show Gist options
  • Save arempe93/66a2a0a6e0491d16df0d to your computer and use it in GitHub Desktop.
Save arempe93/66a2a0a6e0491d16df0d to your computer and use it in GitHub Desktop.
NHL Stats Scraping Information
module DatabaseHelper
require 'active_record'
require 'pg'
adapter: 'postgresql',
database: 'nhlstats_development',
username: ENV['PG_USER'],
password: ENV['PG_PASS'],
host: 'localhost'
class Team < ActiveRecord::Base
class Player < ActiveRecord::Base
def print_all
puts "\nTeams\n=====\n"
Team.all.each do |t|
puts t.inspect
puts "\n#{Team.count} records.\n"
puts "\nPlayers\n=======\n"
Player.all.each do |p|
puts p.inspect
puts "\n#{Player.count} records.\n"
# Pulls NHL team information
# Require
require 'rubygems'
require 'json'
require 'open-uri'
# Stop loop on this game id
stopping_point = 2014020092
# Hashes to store information
names =
numbers =
teams =
# Season file
season_file = open("")
season = JSON.parse
# Loop through all games
season.each do |game|
# Get game id
id = game['id']
# Limit loop
break if id == stopping_point
puts "Opening game: #{id}"
# Open stats file
stats_file = open("{id}/PlayByPlay.json")
plays = JSON.parse(['data']['game']['plays']['play']
plays.each do |play|
# Get the player id
player_id = play['pid']
# Skip this play if the player has been retrieved or doesn't exist
next if not player_id or names.has_key?(player_id)
# Get player information
names[player_id] = play['playername']
numbers[player_id] = play['sweater']
teams[player_id] = play['teamid']
# Print out findings
names.each do |id, name|
puts "#{id} | #{name} | #{numbers[id]} | #{teams[id]}"
puts "Found #{names.count} players"
# Pulls NHL team information
# Require
require 'rubygems'
require 'json'
require 'open-uri'
# Stop loop on this game id
stopping_point = 2014020018
# Hashes to store information
ids =
names =
# Season file
season_file = open("")
season = JSON.parse
# Loop through all games
season.each do |game|
# Get game id
id = game['id']
# Limit loop
break if id == stopping_point
puts "Opening game: #{id}"
# Get team abbreviations
home_team_abbv = game['h']
away_team_abbv = game['a']
# Skip this game if both teams have already been scraped
next if ids.has_key?(home_team_abbv) and ids.has_key?(away_team_abbv)
# Open stats file
stats_file = open("{id}/PlayByPlay.json")
stats = JSON.parse
# Get and store team information
ids[home_team_abbv] = stats['data']['game']['hometeamid']
names[home_team_abbv] = stats['data']['game']['hometeamname']
ids[away_team_abbv] = stats['data']['game']['awayteamid']
names[away_team_abbv] = stats['data']['game']['awayteamname']
# Print out findings
ids.each do |abbv, id|
puts "|#{abbv}|\t|#{names[abbv]}|\t\t|#{id}|"
# Pulls NHL team and player information and stores it in a database
# Require
require 'rubygems'
require 'json'
require 'open-uri'
# Include database code
require_relative 'database_helper'
include DatabaseHelper
# Drop current tables
# Stop loop on this game id
stopping_point = 2014020117
# Season file
season_file = open("")
season = JSON.parse
# Loop through all games
season.each do |game|
# Get game id
id = game['id']
# Limit loop
break if id == stopping_point
puts "Opening game: #{id}"
# Get team abbreviations
home_team_abbv = game['h']
away_team_abbv = game['a']
# Open stats file
stats_file = open("{id}/PlayByPlay.json")
stats = JSON.parse(['data']['game']
# Get team ids
home_team_id = stats['hometeamid']
away_team_id = stats['awayteamid']
# Skip team scraping if both teams have already been scraped
unless Team.find_by(nhl_id: home_team_id) and Team.find_by(nhl_id: away_team_id)
# Get and store team information
home_name = stats['hometeamname'].split ' '
away_name = stats['awayteamname'].split ' '
Team.create(nhl_id: home_team_id, city: home_name.first(home_name.count - 1).join(' '), name: home_name.last, abbv: home_team_abbv) unless Team.find_by(nhl_id: stats['hometeamid'])
Team.create(nhl_id: away_team_id, city: away_name.first(away_name.count - 1).join(' '), name: away_name.last, abbv: away_team_abbv) unless Team.find_by(nhl_id: stats['awayteamid'])
# Loop through all game plays
stats['plays']['play'].each do |play|
# Get goalie information if possible
if play['type'] == 'Shot'
# Skip this goalie if already stored
next if Player.find_by(nhl_id: play['pid2'])
Player.create(nhl_id: play['pid2'], team_id: play['teamid'] == home_team_id ? away_team_id : home_team_id, name: play['p2name'], player_type: 'G')
# Get the player id
player_id = play['pid']
# Skip this play if the player has been retrieved or doesn't exist
next if not player_id or Player.find_by(nhl_id: player_id)
# Also skip penalties with a 3rd man (Goalie penalty)
next if play['type'] == 'Penalty' and play['pid3']
# Get player information
Player.create(nhl_id: player_id, team_id: play['teamid'], name: play['playername'], sweater: play['sweater'], player_type: 'S')

NHL Stats Scraping


Has detailed information on all events of the following types:

  • Shots
  • Hits
  • Goals
  • Penalties

This information includes:

  • Time of the play
  • Period
  • Score
  • Player name/number
  • Secondary players involved
  • Type
  • Players on the ice



Has final type stats for all players that played in the game including:

  • Jersey Number
  • Goals
  • Assists
  • Shots
  • PIMS
  • Time on Ice
  • Plus/Minus


This includes goalies with the following information:

  • Shots Against
  • Saves
  • Save Percentage
  • Goals Against
  • Time on Ice


Has team stat information including:

  • Blocks
  • Takeaways
  • Power Play Percentage
  • Team PIM
  • Giveaways
  • Faceoff Wins
  • Hits


Includes a shot summary by period


Includes a penalty summary by period


Includes a goal summary by period

Copy link

I'm trying to use a visualization tool to show shots and other characteristics of games but I have no idea on how to use this scrubber to do that. Is there anyway to help me out here.

Thanks in advance.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment