# Selva Prabhakaranselva86

Created Nov 24, 2015
Ozone Data treated for outliers and missing values
 # Code used in R Programming Course. # Import Data url <- "http://rstatistics.net/wp-content/uploads/2015/09/ozone.csv" inputData <- read.csv(url) # Replace outliers as missing values. replace_outlier_with_missing <- function(x, na.rm = TRUE, ...) { qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...) # get %iles H <- 1.5 * IQR(x, na.rm = na.rm) # outlier limit threshold y <- x
Created Nov 26, 2015
remove_heteroscedasticity_example.R
 .libPaths() url <- "http://rstatistics.net/wp-content/uploads/2015/09/ozone.csv" inputData <- read.csv(url) # Replace outliers as missing values. replace_outlier_with_missing <- function(x, na.rm = TRUE, ...) { qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...) # get %iles H <- 1.5 * IQR(x, na.rm = na.rm) # outlier limit threshold y <- x
Created Jul 20, 2016
area_plot_in_base_graphics.R
 # How to fill area under the line in base graphics library(xts) library(data.table) library(lubridate) set.seed(100) date_seq <- seq.POSIXt(from=ymd("2016-01-01", tz="UTC"), length=100, by = "day") y <- round(runif(100), 2) df <- data.table(date=date_seq, y) head(df)
Last active Nov 10, 2016
How to write multi-level ifelse() in R?
 # How to write multi-level ifelse() set.seed(100) abc <- sample(letters[1:5], 1000, replace = T) df <- data.frame(v1=abc, v2="blank", stringsAsFactors = F) head(df) system.time({ df\$v2 <- ifelse(df\$v1 == "a", "apple", ifelse(df\$v1 == "b", "ball", ifelse(df\$v1 == "c", "cat",
Created Mar 24, 2017
Solutions for Final Test of Learn R By Intensive Practice
 ## Solutions for Final Test of Learn R By Intensive Practice Q1. ```{r} #1 sqrt (729) #2 1203 %% 22 #3
Created Mar 25, 2017
Preparatory code for lasso regression lecture
 # prep training and test datasets set.seed(100) trainRows <- createDataPartition(prostate\$lpsa, p=.75, list=FALSE) trainData <- prostate[trainRows, ] testData <- prostate[-trainRows, ] # prepare X and Y matrices separately train_x <- as.matrix(trainData[, colnames(trainData) %ni% c("lpsa", "train")]) train_y <- as.matrix(trainData[, "lpsa"]) test_x <- as.matrix(testData[, colnames(trainData) %ni% c("lpsa", "train")])
Created Oct 5, 2017
Reproducible example for ks_plot
 library(InformationValue) library(ggplot2) # 1. Import dataset trainData <- read.csv('https://raw.githubusercontent.com/selva86/datasets/master/breastcancer_training.csv') testData <- read.csv('https://raw.githubusercontent.com/selva86/datasets/master/breastcancer_test.csv') # 2. Build Logistic Model logitmod <- glm(Class ~ Cl.thickness + Cell.size + Cell.shape, family = "binomial", data=trainData) # 3. Predict on testData
Last active Oct 5, 2017
Function to reproduce the KS Chart in machinelearningplus.com/evaluation-metrics-classification-models
 library(InformationValue) library(ggplot2) ks_plot <- function (actuals, predictedScores) { rank <- 0:10 ks_table_out <- InformationValue:::ks_table(actuals = actuals, predictedScores = predictedScores) perc_positive <- c(0, ks_table_out\$cum_perc_responders) * 100 perc_negative <- c(0, ks_table_out\$cum_perc_non_responders) * 100 random_prediction <- seq(0, 100, 10) df <- data.frame(rank, random_prediction, perc_positive, perc_negative) df_stack <- stack(df, c(random_prediction, perc_positive, perc_negative))
Created Dec 14, 2019
 # Pre-create a 'pizza_tc_score' vector with missing values set.seed(100) pizza_tc_score <- round(runif (1000,3,10)) pizza_tc_score [c(100,204,709,816,938)] = NA
Created Dec 23, 2019
Mini Challenge for R Course
 # Mini Challenge Inputs vans <- c(3,4,5,2,4,4,5) boxes <- c(30,44,50,18,36,36,40)
