Last active
August 29, 2015 14:04
-
-
Save MonkmanMH/3c0da6afd58eb61e2c51 to your computer and use it in GitHub Desktop.
dplyr testing and goofing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# setwd("D:/R_the software/datatrials/Lahman") | |
# | |
require(Lahman) | |
require(dplyr) | |
# | |
# throwing by position | |
# version 1 - "merge" | |
MasterFielding <- data.frame(merge(Master, Fielding, by="playerID")) | |
MasterFielding <- merge(Master, Fielding, by="playerID") | |
system.time(MasterFielding <- merge(Master, Fielding, by="playerID")) | |
# the dplyr version -- faster | |
MasterFielding <- inner_join(Fielding, Master, by="playerID") | |
system.time(MasterFielding <- inner_join(Fielding, Master, by="playerID")) | |
# | |
# | |
# a count of games played, by position | |
MasterFielding <- subset(MasterFielding, POS != "OF" & yearID > "1944") | |
# | |
MasterFielding %.% | |
group_by(playerID, POS, throws) %.% | |
summarise(gamecount = sum(G)) %.% | |
arrange(desc(gamecount)) %.% | |
head(5) | |
# | |
MasterFielding <- inner_join(Fielding, Master, by="playerID") | |
# select only those seasons since 1945 and | |
# omit the records that are OF summary (i.e. leave the RF, CF, and LF) | |
MasterFielding <- subset(MasterFielding, POS != "OF" & yearID > "1944") | |
# | |
Player_games <- MasterFielding %.% | |
group_by(playerID, POS, throws) %.% | |
summarise(gamecount = sum(G)) | |
Player_POS <- Player_games %.% | |
group_by(POS, throws) %.% | |
summarise(playercount = length(gamecount)) | |
head(Player_POS) | |
# | |
# | |
# | |
# ################################ | |
# | |
# from dplyr reference manual | |
# http://cran.r-project.org/web/packages/dplyr/index.html | |
require(dplyr) | |
# | |
if (require("Lahman")) { | |
batting_tbl <- tbl_df(Batting) | |
tally(group_by(batting_tbl, yearID)) | |
tally(group_by(batting_tbl, yearID), sort = TRUE) | |
# Multiple tallys progressively role up the groups | |
plays_by_year <- tally(group_by(batting_tbl, playerID, stint), sort = TRUE) | |
tally(plays_by_year, sort = TRUE) | |
tally(tally(plays_by_year)) | |
# This looks a little nicer if you use the infix %.% operator | |
batting_tbl %.% group_by(playerID) %.% tally(sort = TRUE) | |
} | |
# | |
# | |
# ######################################## | |
# DPLYR experimentation | |
# | |
require(Lahman) | |
require(vcd) | |
require(dplyr) | |
# | |
# creating new objects without reading into memory | |
PlayerFielding <- group_by(Fielding, playerID) | |
PlayerSeasons <- summarise(PlayerFielding, total = length(POS)) | |
head(PlayerSeasons) | |
# | |
# | |
Batting %.% | |
group_by(playerID) %.% | |
summarise(total = sum(G)) %.% | |
arrange(desc(total)) %.% | |
head(5) | |
# | |
Fielding %.% | |
group_by(playerID, POS) %.% | |
summarise(total = sum(G)) %.% | |
arrange(desc(total)) %.% | |
head(5)# | |
# | |
PlayerGames <- (Fielding %.% | |
group_by(playerID, POS) %.% | |
summarise(total = sum(G)) %.% | |
arrange(desc(total)) | |
) | |
head(PlayerGames) | |
# | |
# create MasterFielding using dplyr | |
MasterFielding <- inner_join(Fielding, Master, by="playerID") | |
# | |
# summarize MasterFielding by (a) filter only years > 1945 and (b) select specific variables | |
MasterFielding45 <- ( | |
MasterFielding %.% | |
filter(yearID > 1945) %.% | |
select(playerID, yearID, POS, G, throws) | |
) | |
head(MasterFielding45) | |
# | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment