Skip to content

Instantly share code, notes, and snippets.

var1 <- ifelse(is.na(var), min(var, na.rm=T), var)
var2 <- ifelse(is.na(var), max(var, na.rm=T), var)
var3 <- ifelse(is.na(var), median(var, na.rm=T), var)
var4 <- ifelse(is.na(var), 0, var)
var.freq <- table(var)
var.freq <- var.freq[rownames(var.freq) != ""]
# impute with mode, most frequent category
var.mode <- names(var.freq[var.freq==max(var.freq)])
var1 <- ifelse(is.na(var) | var=="", var.mode, var)
# impute with random category using sample distribution
var.filled <- var[!is.na(var) & var!=""]
var.random <- var.filled[sample(1:length(var.filled), 1)]
wall.test <- c(2,5,1,2,3,4,7,7,6)
fn_puddle(wall.test) # 10
wall.test <- c(2,5,1,3,1,2,1,7,7,6)
fn_puddle(wall.test) # 17
wall.test <- c(2,5,1,3,1,2,1,7,7,0,7)
fn_puddle(wall.test) # 24
## my initial attempt
#.. works on 1 big puddle, does not work for c(5,1,5,1,5)
fn_1puddle <- function(walls) {
# custom tryCatch to return result and warnings -- http://stackoverflow.com/a/24569739/2271856
myTryCatch <- function(expr) {
warn <- err <- NULL
value <- withCallingHandlers(
tryCatch(expr, error=function(e) {
err <<- e
NULL
}), warning=function(w) {
warn <<- w
invokeRestart("muffleWarning")
fn_NApattern <- function(myrow) {
return(paste(is.na(myrow)+0, collapse=""))
}
# test function with dummy data
(df_dummy <- data.frame(X1=c(5,NA,3,NA,3,2,5,2),
X2=c(NA,0,3,NA,3,1,NA,2),
X3=c(1,5,NA,NA,6,1,NA,2),
X4=c(NA,NA,3,3,3,1,NA,NA)))
# X1 X2 X3 X4
# returns A-Z or AA-ZZ for up to 26*26=676 unique categories
anonymise <- function(mycats) {
cnt <- length(unique(mycats))
mycats <- factor(mycats, labels=1:cnt)
if (cnt <= 26)
return(LETTERS[mycats])
newcats <- character(0)
for (i in 1:ceiling(cnt/26))
newcats <- c(newcats, paste0(LETTERS[i],LETTERS[1:26]))
return(newcats[mycats])
# create dummy data for testing
require(caret)
require(dplyr)
full <- data.frame(target = sample(c(0,1), 500, replace=T),
ID = 1:500,
v1 = sample(LETTERS[1:2], 500, replace=T),
v2 = sample(1:100, 500, replace=T),
v3 = sample(LETTERS[1:10], 500, replace=T),
stringsAsFactors = FALSE)
folds <- createFolds(full$target, k=5, list=TRUE, returnTrain=FALSE)
# print everything after loop is finished
for (i in 0:101) {
print(i)
Sys.sleep(0.01)
}
# simplist way to print within loop
for (i in 0:101) {
print(i)
Sys.sleep(0.01)
# TIC TAC TOE
# Run function ttt(n) to begin
ttt <- function (n = 3) {
# set up board variables
board <- matrix(rep(".", (n+1)^2), ncol=n+1)
board[,1] <- c("/", 1:n)
board[1,] <- c("/", 1:n)
players <- c("X", "O")
require(ggplot2)
# prepare simpsons data ---------------------------------------------------
# code from http://suehpro.blogspot.com/2016/03/the-simpsons-as-chart.html
d1 <- data.frame(member=c(rep("Homer",3),
rep("Marge",3),
rep("Bart",3),
rep("Lisa",2),
rep("Maggie",2)),
shade=c("HomerPants","HomerShirt","Skin",