Skip to content

Instantly share code, notes, and snippets.

@jaehyeon-kim
Last active August 29, 2015 14:13
Show Gist options
  • Save jaehyeon-kim/13de56fa59eec73f2ccd to your computer and use it in GitHub Desktop.
Save jaehyeon-kim/13de56fa59eec73f2ccd to your computer and use it in GitHub Desktop.
## set up variables
size <- 36000
numUsers <- 4900
# roughly each user has 7 sessions
numSessions <- (numUsers / 7) - ((numUsers / 7) %% 1)
## create data frame
set.seed(123457)
userIds <- sample.int(numUsers, size=size, replace=TRUE)
ssIds <- sample.int(numSessions, size=size, replace=TRUE)
scores <- sample.int(10, size=size, replace=TRUE)
preDf <- data.frame(User=userIds, Session=ssIds, Score=scores)
preDf$User <- as.factor(preDf$User)
## adding overall mean
# way 1
overallDf1 <- transform(preDf, MeanScore=mean(Score, na.rm=TRUE))
# way 2
require(plyr)
overallDf2 <- mutate(preDf, MeanScore=mean(Score, na.rm=TRUE))
# way 3
require(dplyr)
overallDf3 <- preDf %>%
mutate(MeanScore=mean(Score, na.rm=TRUE))
# way 4
require(data.table)
preDt <- data.table(preDf)
setkey(preDt, User)
overallDt <- preDt[,list(User=User
,Session=Session
,Score=Score
,MeanScore=mean(Score, na.rm=T))]
# way 5 - personal preference
overallDf4 <- preDt %>%
mutate(MeanScore=mean(Score, na.rm=TRUE))
## adding mean by user id
# way 1 - not recommended
require(plyr)
postDf1 <- ddply(preDf
,.(User)
,mutate
,MeanScore=mean(Score, na.rm=TRUE))
# way 2
require(dplyr)
postDf2 <- preDf %>%
group_by(User) %>%
mutate(MeanScore=mean(Score, na.rm=TRUE)) %>%
arrange(User)
# way 3
require(data.table)
preDt <- data.table(preDf)
setkey(preDt, User)
postDt <- preDt[,list(Session=Session
,Score=Score
,MeanScore=mean(Score, na.rm=T))
,by=User]
# way 4 - personal preference
postDf3 <- preDt %>%
group_by(User) %>%
mutate(MeanScore=mean(Score, na.rm=TRUE)) %>%
arrange(User)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment