Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
dplyr testing and goofing
#
# setwd("D:/R_the software/datatrials/Lahman")
#
require(Lahman)
require(dplyr)
#
# throwing by position
# version 1 - "merge"
MasterFielding <- data.frame(merge(Master, Fielding, by="playerID"))
MasterFielding <- merge(Master, Fielding, by="playerID")
system.time(MasterFielding <- merge(Master, Fielding, by="playerID"))
# the dplyr version -- faster
MasterFielding <- inner_join(Fielding, Master, by="playerID")
system.time(MasterFielding <- inner_join(Fielding, Master, by="playerID"))
#
#
# a count of games played, by position
MasterFielding <- subset(MasterFielding, POS != "OF" & yearID > "1944")
#
MasterFielding %.%
group_by(playerID, POS, throws) %.%
summarise(gamecount = sum(G)) %.%
arrange(desc(gamecount)) %.%
head(5)
#
MasterFielding <- inner_join(Fielding, Master, by="playerID")
# select only those seasons since 1945 and
# omit the records that are OF summary (i.e. leave the RF, CF, and LF)
MasterFielding <- subset(MasterFielding, POS != "OF" & yearID > "1944")
#
Player_games <- MasterFielding %.%
group_by(playerID, POS, throws) %.%
summarise(gamecount = sum(G))
Player_POS <- Player_games %.%
group_by(POS, throws) %.%
summarise(playercount = length(gamecount))
head(Player_POS)
#
#
#
# ################################
#
# from dplyr reference manual
# http://cran.r-project.org/web/packages/dplyr/index.html
require(dplyr)
#
if (require("Lahman")) {
batting_tbl <- tbl_df(Batting)
tally(group_by(batting_tbl, yearID))
tally(group_by(batting_tbl, yearID), sort = TRUE)
# Multiple tallys progressively role up the groups
plays_by_year <- tally(group_by(batting_tbl, playerID, stint), sort = TRUE)
tally(plays_by_year, sort = TRUE)
tally(tally(plays_by_year))
# This looks a little nicer if you use the infix %.% operator
batting_tbl %.% group_by(playerID) %.% tally(sort = TRUE)
}
#
#
# ########################################
# DPLYR experimentation
#
require(Lahman)
require(vcd)
require(dplyr)
#
# creating new objects without reading into memory
PlayerFielding <- group_by(Fielding, playerID)
PlayerSeasons <- summarise(PlayerFielding, total = length(POS))
head(PlayerSeasons)
#
#
Batting %.%
group_by(playerID) %.%
summarise(total = sum(G)) %.%
arrange(desc(total)) %.%
head(5)
#
Fielding %.%
group_by(playerID, POS) %.%
summarise(total = sum(G)) %.%
arrange(desc(total)) %.%
head(5)#
#
PlayerGames <- (Fielding %.%
group_by(playerID, POS) %.%
summarise(total = sum(G)) %.%
arrange(desc(total))
)
head(PlayerGames)
#
# create MasterFielding using dplyr
MasterFielding <- inner_join(Fielding, Master, by="playerID")
#
# summarize MasterFielding by (a) filter only years > 1945 and (b) select specific variables
MasterFielding45 <- (
MasterFielding %.%
filter(yearID > 1945) %.%
select(playerID, yearID, POS, G, throws)
)
head(MasterFielding45)
#
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.