#######################################################################################
# Function to scrape season skater statistics from Hockey-reference.com
#######################################################################################
GrabSkaters <- function(S) {
  
  # The function takes parameter S which is a string and represents the Season
	# Returns: data frame    
  
  require(XML)
	
	## create the URL
	URL <- paste("http://www.hockey-reference.com/leagues/NHL_", 
			S, "_skaters.html", sep="")
	
	## grab the page -- the table is parsed nicely
	tables <- readHTMLTable(URL)
	ds.skaters <- tables$stats
	
	## determine if the HTML table was well formed (column names are the first record)
	## can either read in directly or need to force column names
	## and 
	
	## I don't like dealing with factors if I don't have to
	## and I prefer lower case
	for(i in 1:ncol(ds.skaters)) {
		ds.skaters[,i] <- as.character(ds.skaters[,i])
		names(ds.skaters) <- tolower(colnames(ds.skaters))
	}
	
	## fix a couple of the column names
	colnames(ds.skaters)
	## names(ds.skaters)[10] <- "plusmin"
	names(ds.skaters)[11] <- "plusmin"
	names(ds.skaters)[18] <- "spct"
	
	## finally fix the columns - NAs forced by coercion warnings
	for(i in c(1, 3, 6:18)) {
		ds.skaters[,i] <- as.numeric(ds.skaters[, i])
	}
	
	## convert toi to seconds, and seconds/game
	## ds.skaters$seconds <- (ds.skaters$toi*60)/ds.skaters$gp
	
	## remove the header and totals row
	ds.skaters <- ds.skaters[!is.na(ds.skaters$rk), ]
	## ds.skaters <- ds.skaters[ds.skaters$tm != "TOT", ]
	
	## add the year
	ds.skaters$season <- S
	
	## return the dataframe
	return(ds.skaters)
	
}