Tim Abraham timabe

## map.geojson

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                timabe
                / map.geojson
            
            
              Last active
              June 21, 2020 20:29
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## statcast_columns.R
statcast_cols <- cols(
  pitch_type = col_character(),
  game_date = col_date(format = ""),
  release_speed = col_double(),
  release_pos_x = col_double(),
  release_pos_z = col_double(),
  player_name = col_character(),
  batter = col_double(),
  pitcher = col_double(),
  events = col_character(),

## yelp_hclust
load('busCats.Rdata') # load this data from github.com/timabe


catSums <- colSums(bus.cats) # get summary data, with just the category sums
catSums[order(-catSums)]->catSums # order the categories ahead of plotting them
plot(log(catSums))
# the plot shows a skewed set. Many of these will be useless in the hierarchical clustering
# as there are only a handful of observations of them. It's unlikely they will produce an
# interesting cluster membership
# lets concentrate on the top 60 since there's a slight kink there

## yelp_hclust.R
load('busCats.Rdata') # download this from https://dl.dropboxusercontent.com/u/6132890/busCats.Rdata

catSums <- colSums(bus.cats) # get summary data, with just the category sums
catSums[order(-catSums)]->catSums # order the categories ahead of plotting them
plot(log(catSums))
# the plot shows a skewed set. Many of these will be useless in the hierarchical clustering
# as there are only a handful of observations of them. It's unlikely they will produce an
# interesting cluster membership
# lets concentrate on the top 60 since there's a slight kink there
# and for similarity purposes we only want businesses that have multiple categories

## predictOverFullGames.R
predictOverFullGames <- function(name) {
  player <- subset(pitch.test, pitcher_name == name)
  # genrate predictions as votes
  player$prediction  <- predict(rf, player, type = 'vote')
  # aggregate over his individual games
  playerAgg <- aggregate(. ~ V1, data = data.frame(cbind( player$url, player$prediction)), sum)
  # melt data frame to extract winner of majority vote
  melt.player <- melt(playerAgg, id = 'V1')
  melt.player$V1 <- as.character(melt.player$V1)
  melt.player <- ddply(melt.player, .(as.character(V1)), summarize, pitcher = variable[value == max(value)])

## predictPitcherRF.R
# Generate Training and Testing data from full pitch dataframe
train.vec2 <- sample(nrow(pitch), nrow(pitch) * 0.7)
pitch.train <- pitch[train.vec2, ]
pitch.test <- pitch[-train.vec2, ]

# Change pitcher name and pitch type from character variable to factor
pitch.train$pitcher_name <- as.factor(pitch.train$pitcher_name)
pitch.train$pitch_type <- as.factor(pitch.train$pitch_type)
pitch.test$pitcher_name <- as.factor(pitch.test$pitcher_name)
pitch.test$pitch_type <- as.factor(pitch.test$pitch_type)

## reduceDimensions.R
# these variables aren't useful for analysis
badVars <- c('des', 'on_1b', 'on_2b', 'on_3b', 'type', 'type_confidence',
                'z0', 'zone', 'url', 'num', 'count', 'pitcher', 'batter_name',
                'sv_id', 'nasty')

varsToOmit = c(badVars, 'pitcher_name', 'pitch_type')

# let's explore this data . . . use 3 pitchers and see if some dimentions of
# their fastballs can differentiate them well.

## hot_hand_ex.R
### HOT HAND
###
### EXAMPLE: Anthony Morrow

# first source code from https://github.com/timabe/Hot-Hand/blob/master/hot_hand.R

streaks('Anthony Morrow')
### outputs it as data frame to streak_count
historicals('Anthony Morrow')
### outputs it as a list to historical.list
	statcast_cols <- cols(
	pitch_type = col_character(),
	game_date = col_date(format = ""),
	release_speed = col_double(),
	release_pos_x = col_double(),
	release_pos_z = col_double(),
	player_name = col_character(),
	batter = col_double(),
	pitcher = col_double(),
	events = col_character(),
	load('busCats.Rdata') # load this data from github.com/timabe


	catSums <- colSums(bus.cats) # get summary data, with just the category sums
	catSums[order(-catSums)]->catSums # order the categories ahead of plotting them
	plot(log(catSums))
	# the plot shows a skewed set. Many of these will be useless in the hierarchical clustering
	# as there are only a handful of observations of them. It's unlikely they will produce an
	# interesting cluster membership
	# lets concentrate on the top 60 since there's a slight kink there
	load('busCats.Rdata') # download this from https://dl.dropboxusercontent.com/u/6132890/busCats.Rdata

	catSums <- colSums(bus.cats) # get summary data, with just the category sums
	catSums[order(-catSums)]->catSums # order the categories ahead of plotting them
	plot(log(catSums))
	# the plot shows a skewed set. Many of these will be useless in the hierarchical clustering
	# as there are only a handful of observations of them. It's unlikely they will produce an
	# interesting cluster membership
	# lets concentrate on the top 60 since there's a slight kink there
	# and for similarity purposes we only want businesses that have multiple categories
	predictOverFullGames <- function(name) {
	player <- subset(pitch.test, pitcher_name == name)
	# genrate predictions as votes
	player$prediction <- predict(rf, player, type = 'vote')
	# aggregate over his individual games
	playerAgg <- aggregate(. ~ V1, data = data.frame(cbind( player$url, player$prediction)), sum)
	# melt data frame to extract winner of majority vote
	melt.player <- melt(playerAgg, id = 'V1')
	melt.player$V1 <- as.character(melt.player$V1)
	melt.player <- ddply(melt.player, .(as.character(V1)), summarize, pitcher = variable[value == max(value)])
	# Generate Training and Testing data from full pitch dataframe
	train.vec2 <- sample(nrow(pitch), nrow(pitch) * 0.7)
	pitch.train <- pitch[train.vec2, ]
	pitch.test <- pitch[-train.vec2, ]

	# Change pitcher name and pitch type from character variable to factor
	pitch.train$pitcher_name <- as.factor(pitch.train$pitcher_name)
	pitch.train$pitch_type <- as.factor(pitch.train$pitch_type)
	pitch.test$pitcher_name <- as.factor(pitch.test$pitcher_name)
	pitch.test$pitch_type <- as.factor(pitch.test$pitch_type)
	# these variables aren't useful for analysis
	badVars <- c('des', 'on_1b', 'on_2b', 'on_3b', 'type', 'type_confidence',
	'z0', 'zone', 'url', 'num', 'count', 'pitcher', 'batter_name',
	'sv_id', 'nasty')

	varsToOmit = c(badVars, 'pitcher_name', 'pitch_type')

	# let's explore this data . . . use 3 pitchers and see if some dimentions of
	# their fastballs can differentiate them well.
	### HOT HAND
	###
	### EXAMPLE: Anthony Morrow

	# first source code from https://github.com/timabe/Hot-Hand/blob/master/hot_hand.R

	streaks('Anthony Morrow')
	### outputs it as data frame to streak_count
	historicals('Anthony Morrow')
	### outputs it as a list to historical.list