Skip to content

Instantly share code, notes, and snippets.

@NickTalavera
Last active December 1, 2016 15:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NickTalavera/b13d9d56d2473c2ffbc816cf43c93901 to your computer and use it in GitHub Desktop.
Save NickTalavera/b13d9d56d2473c2ffbc816cf43c93901 to your computer and use it in GitHub Desktop.
Steam Sales Blog
generousNameMerger = function(dataX,dataY,mergeType="all",keepName = "x") {
dataList = list(dataX, dataY)
datasWNameModded = foreach(i=1:length(dataList)) %dopar% {
datasOut = dataList[[i]]
datasOut$Name = as.character(datasOut$Name)
datasOut$NameModded = tolower(datasOut$Name)
lastWords = as.integer(stringr::str_trim(stringr::str_extract(datasOut$NameModded,pattern="[0-9]+")))
lastWords = as.character(as.roman(lastWords))
datasOut$NameModded[!is.na(lastWords)] = stringr::str_replace(datasOut$NameModded[!is.na(lastWords)], replacement = lastWords[!is.na(lastWords)], pattern = "[0-9]+")
removeWords = tolower(c("[^a-zA-Z0-9]"," ","Remastered","Videogame","WWE","EA*SPORTS","Soccer","&","™","®","DVD$","of","DX","disney","Deluxe","Complete","Ultimate","Encore","definitive","for","edition","standard","special","game", "the","Gold","Legendary","Base*Game","free*to*play","full*game", "year","hd","movie","TM","Cabela\"s","and"," x$","s$"))
for (i in removeWords) {
datasOut$NameModded = gsub(i, "", datasOut$NameModded, ignore.case = TRUE)
}
datasOut$NameModded[datasOut$NameModded == ""] = datasOut$Name
return(datasOut)
}
dataX = datasWNameModded[[1]]
dataY = datasWNameModded[[2]]
if (tolower(mergeType) == "all") {
data = merge(x = dataX, y = dataY, by = "NameModded", all = TRUE)
} else if (tolower(mergeType) == "all.x") {
data = merge(x = dataX, y = dataY, by = "NameModded", all.x = TRUE)
} else if (tolower(mergeType) == "all.y") {
data = merge(x = dataX, y = dataY, by = "NameModded", all.y = TRUE)
}
if (tolower(keepName) == "x") {
data$Name.x[is.na(data$Name.x)] = data$Name.y[is.na(data$Name.x)]
data$Name = data$Name.x
} else {
data$Name.y[is.na(data$Name.y)] = data$Name.x[is.na(data$Name.y)]
data$Name = data$Name.y
}
data = VarDrop(data, c("Name.y", "Name.x", "NameModded"))
data = gameRemover(data)
data = keepLargestDuplicate(data)
return (data)
}
steamSpySaleCSVPreparer <- function() {
steamSummerSaleNew = as.data.frame(read.csv(paste0(dataLocale, "Steam Summer Sale - SteamSpy - All the data and stats about Steam games.csv"),sep=",", stringsAsFactors = FALSE, na.strings=c("","NA")))
steamSummerSaleNew = data.frame(parLapply(cl = cl, steamSummerSaleNew, sub, pattern = "N/A", replacement = NA, fixed = TRUE))
steamSummerSaleNew = data.frame(parLapply(cl = cl, steamSummerSaleNew, as.character))
steamSummerSaleNew = data.frame(parLapply(cl = cl, steamSummerSaleNew, gsub, pattern = "\\±|\\,|\\$|\\%|\\(|\\)", replacement = "", fixed = FALSE))
steamSummerSaleNew = dplyr::select(steamSummerSaleNew, "Name" = Game,
"Price_Before_Sale" = Price,
"Maximum_Percent_Sale_and_Minimum_Price_With_Sale" = Max.discount,
"Owners_Before" = Owners.before,
"Owners_After" = Owners.after,
"Userscore_And_Metascore" = Userscore..Metascore.,
Increase
)
saleStrings = str_split_fixed(steamSummerSaleNew$Maximum_Percent_Sale_and_Minimum_Price_With_Sale, " ", 2)
steamSummerSaleNew$Sale_Percent = saleStrings[,1]
steamSummerSaleNew$Price_During_Sale = saleStrings[,2]
reviewScoreStrings = str_split_fixed(steamSummerSaleNew$Userscore_And_Metascore, " ", 2)
steamSummerSaleNew$Review_Score_Steam_Users = reviewScoreStrings[,1]
steamSummerSaleNew$Review_Score_Metacritic = reviewScoreStrings[,2]
steamSummerSaleNew$Owners_Before = stringr::str_replace_all(steamSummerSaleNew$Owners_Before," ","")
steamSummerSaleNew$Owners_After = stringr::str_replace_all(steamSummerSaleNew$Owners_After," ","")
steamSummerSaleNew$Price_Before_Sale = as.character(steamSummerSaleNew$Price_Before_Sale)
columnsToNumeric = c("Sale_Percent","Price_During_Sale","Price_Before_Sale","Review_Score_Steam_Users","Review_Score_Metacritic","Owners_Before","Owners_After","Increase")
steamSummerSaleNew[columnsToNumeric] <- parSapply(cl=cl, steamSummerSaleNew[columnsToNumeric], as.numeric)
steamSummerSaleNew$Sales = steamSummerSaleNew$Owners_After - steamSummerSaleNew$Owners_Before
steamSummerSaleNew = dplyr::select(steamSummerSaleNew,
-Userscore_And_Metascore,
-Maximum_Percent_Sale_and_Minimum_Price_With_Sale,
-Owners_After,
-Owners_Before)
return(steamSummerSaleNew)
}
metacriticCSVPreparer <- function() {
metacriticReviews = read.csv(paste0(dataLocale,"metacritic-20151227.csv"),sep=";",stringsAsFactors = FALSE, na.strings=c("","NA"))
metacriticReviews = rename(metacriticReviews, "Name" = title)
metacriticReviews = metacriticReviews[metacriticReviews$platform %!in% c("ios","gba","ds","3ds","psp","vita"),]
metacriticReviews = dplyr::select(metacriticReviews, -user_score)
metacriticReviews$release = as.Date(as.character(metacriticReviews$release),"%b %d, %Y")
return(metacriticReviews)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment