Skip to content

Instantly share code, notes, and snippets.

@gljeremy
Created October 2, 2011 23:11
Show Gist options
  • Save gljeremy/1258091 to your computer and use it in GitHub Desktop.
Save gljeremy/1258091 to your computer and use it in GitHub Desktop.
Scrapes the FIFA web site for stats that feed a fantasy league.
require 'rubygems'
require 'mechanize'
require 'uri'
require 'CSV'
class FifaScrape
attr_reader :baseUrl, :mech
def initialize(url, proxy)
@mech = Mechanize.new
@baseUrl = url
if !proxy.nil? then
@mech.set_proxy(proxy, 80, nil, nil)
end
end
def scrapePlayer(playerName,csvRow)
puts "Fetching player stats for " + playerName
@page = @mech.get(@baseUrl + playerName.sub(/ /,'+'))
playerUrl = @page.links_with(:href => /player=/)[0].href
puts "Found player page: " + playerUrl
puts "Parsing FIFA player ID"
playerId = playerUrl.split('/')[3].split('=')[1]
puts "Found FIFA player Id: " + playerId
playerStatsBaseUrl = "http://www.fifa.com/worldcup/statistics/players/player=" + playerId
playerStatsMainUrl = playerStatsBaseUrl + "/mainstatistics.html"
@page = @mech.get(playerStatsMainUrl)
playerCsvRow = Array.new(28)
#puts @page.search(".//td[@class='lbl']")
#puts @page.search(".//td[@class='r val']").inner_text().split[1]
mainStats = @page.search(".//td[@class='r val']").inner_text().split
playerCsvRow[Scores::PlayerName] = playerName
playerCsvRow[Scores::A] = "na"
playerCsvRow[Scores::BS] = mainStats[FifaMainStatIndexes::Shotsblocked].nil? ? "ND" : mainStats[FifaMainStatIndexes::Shotsblocked]
playerCsvRow[Scores::CS] = "tbd"
playerCsvRow[Scores::CW] = "na"
playerCsvRow[Scores::FC] = mainStats[FifaMainStatIndexes::FoulsCommitted].nil? ? "ND" : mainStats[FifaMainStatIndexes::FoulsCommitted]
playerCsvRow[Scores::FW] = mainStats[FifaMainStatIndexes::FoulsSuffered].nil? ? "ND" : mainStats[FifaMainStatIndexes::FoulsSuffered]
playerCsvRow[Scores::G] = mainStats[FifaMainStatIndexes::Goalsscored].nil? ? "ND" : mainStats[FifaMainStatIndexes::Goalsscored]
playerCsvRow[Scores::GA] = "tbd"
playerCsvRow[Scores::HT] = "tbd"
playerCsvRow[Scores::LST] = "na"
playerCsvRow[Scores::L] = "tbd"
playerCsvRow[Scores::MW] = "na"
playerCsvRow[Scores::OG] = "na"
playerCsvRow[Scores::PI] = "na"
playerCsvRow[Scores::PC] = mainStats[FifaMainStatIndexes::Penalties].nil? ? "ND" : mainStats[FifaMainStatIndexes::Penalties]
playerCsvRow[Scores::PK] = "na"
playerCsvRow[Scores::PKM] = "na"
playerCsvRow[Scores::PKS] = "na"
playerCsvRow[Scores::RC] = mainStats[FifaMainStatIndexes::RedCards].nil? ? "ND" : mainStats[FifaMainStatIndexes::RedCards]
playerCsvRow[Scores::SV] = mainStats[FifaMainStatIndexes::Saves].nil? ? "ND" : mainStats[FifaMainStatIndexes::Saves]
playerCsvRow[Scores::SPG] = "na"
playerCsvRow[Scores::SOT] = mainStats[FifaMainStatIndexes::Shotsongoal].nil? ? "ND" : mainStats[FifaMainStatIndexes::Shotsongoal]
playerCsvRow[Scores::TW] = "na"
playerCsvRow[Scores::T] = "tbd"
playerCsvRow[Scores::W] = "tbd"
playerCsvRow[Scores::YC] = mainStats[FifaMainStatIndexes::Yellowcards].nil? ? "ND" : mainStats[FifaMainStatIndexes::Yellowcards]
playerStatsPassesUrl = playerStatsBaseUrl + "/passes.html"
@page = @mech.get(playerStatsPassesUrl)
passStats = @page.search(".//td[@class='r val']").inner_text().split
playerCsvRow[Scores::SC] = passStats[FifaPassesStatIndexes::CrossesCompleted].nil? ? "ND" : passStats[FifaPassesStatIndexes::CrossesCompleted]
csvBuffer = ''
CSV.generate_row(playerCsvRow, playerCsvRow.length, csvBuffer)
csvRow << csvBuffer.gsub!(/[\n]+/,"") << "\n"
csvRow.flush
end
end
#A BS CS CW FC FW G GA HT LST L MW OG PI PC PK PKM PKS RC SV SPG SOT SC TW T W YC
#0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
class Scores
PlayerName = 0
A = 1
BS = 2
CS = 3
CW = 4
FC = 5
FW = 6
G = 7
GA = 8
HT = 9
LST = 10
L = 11
MW = 12
OG = 13
PI = 14
PC = 15
PK = 16
PKM = 17
PKS = 18
RC = 19
SV = 20
SPG = 21
SOT = 22
SC = 23
TW = 24
T = 25
W = 26
YC = 27
end
class FifaMainStatIndexes
MatchesPlayed = 0
Minutesplayed = 1
Goalsscored = 2
Goalsduring1sthalf = 3
Goalsduring2ndhalf = 4
Goalsduring1stextratime = 5
Goalsduring2ndextratime = 6
Penalties = 7
GoalsScoredinPenaltyArea = 8
GoalsScoredfromOutsidePenaltyArea = 9
FoulsCommitted = 10
FoulsSuffered = 11
Yellowcards = 12
Secondyellowcardandredcard = 13
RedCards = 14
TotalShots = 15
ShotsFromPenaltyArea = 16
ShotsOutsidePenaltyArea = 17
Shotsongoal = 18
ShotsongoalfromPenaltyArea = 19
ShotsongoalfromoutsidePenaltyArea = 20
ShotsWide = 21
ShotswidefromPenaltyArea = 22
ShotswidefromOutsidePenaltyArea = 23
Shotsblocked = 24
Blockedshotsfrominsidethepenaltyarea = 25
BlockedShotsfromoutsidePenaltyArea = 26
Offsides = 27
Saves = 28
end
class FifaPassesStatIndexes
CrossesCompleted = 4
end
baseUrl = "http://www.fifa.com/worldcup/players/index.htmx?pn="
proxy = ""
playerFile = File.new("playerlist.txt", "r")
playerList = playerFile.readlines
fifa = FifaScrape.new(baseUrl, nil)
outfile = File.open("PlayerStats.csv","wb") do |csvRow|
playerList.each do |playerName|
fifa.scrapePlayer(playerName, csvRow)
end
end
@zSynctic
Copy link

zSynctic commented Oct 5, 2020

its say that there is an error in the first line

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment