Skip to content

Instantly share code, notes, and snippets.

@selva86
selva86 / prep_for_significant_variables.R
Created November 24, 2015 13:04
Ozone Data treated for outliers and missing values
# Code used in R Programming Course.
# Import Data
url <- "http://rstatistics.net/wp-content/uploads/2015/09/ozone.csv"
inputData <- read.csv(url)
# Replace outliers as missing values.
replace_outlier_with_missing <- function(x, na.rm = TRUE, ...) {
qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...) # get %iles
H <- 1.5 * IQR(x, na.rm = na.rm) # outlier limit threshold
y <- x
@selva86
selva86 / residual_analysis_heteroscedasticity.R
Created November 26, 2015 07:13
remove_heteroscedasticity_example.R
.libPaths()
url <- "http://rstatistics.net/wp-content/uploads/2015/09/ozone.csv"
inputData <- read.csv(url)
# Replace outliers as missing values.
replace_outlier_with_missing <- function(x, na.rm = TRUE, ...) {
qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...) # get %iles
H <- 1.5 * IQR(x, na.rm = na.rm) # outlier limit threshold
y <- x
@selva86
selva86 / area_plot_in_base_graphics.R
Created July 20, 2016 15:13
area_plot_in_base_graphics.R
# How to fill area under the line in base graphics
library(xts)
library(data.table)
library(lubridate)
set.seed(100)
date_seq <- seq.POSIXt(from=ymd("2016-01-01", tz="UTC"), length=100, by = "day")
y <- round(runif(100), 2)
df <- data.table(date=date_seq, y)
head(df)
@selva86
selva86 / multilevel_ifelse.R
Last active November 10, 2016 10:47
How to write multi-level ifelse() in R?
# How to write multi-level ifelse()
set.seed(100)
abc <- sample(letters[1:5], 1000, replace = T)
df <- data.frame(v1=abc, v2="blank", stringsAsFactors = F)
head(df)
system.time({
df$v2 <- ifelse(df$v1 == "a", "apple",
ifelse(df$v1 == "b", "ball",
ifelse(df$v1 == "c", "cat",
@selva86
selva86 / final_test.R
Created March 24, 2017 09:30
Solutions for Final Test of Learn R By Intensive Practice
## Solutions for Final Test of Learn R By Intensive Practice
Q1.
```{r}
#1
sqrt (729)
#2
1203 %% 22
#3
@selva86
selva86 / lasso_dataprep.R
Created March 25, 2017 10:27
Preparatory code for lasso regression lecture
# prep training and test datasets
set.seed(100)
trainRows <- createDataPartition(prostate$lpsa, p=.75, list=FALSE)
trainData <- prostate[trainRows, ]
testData <- prostate[-trainRows, ]
# prepare X and Y matrices separately
train_x <- as.matrix(trainData[, colnames(trainData) %ni% c("lpsa", "train")])
train_y <- as.matrix(trainData[, "lpsa"])
test_x <- as.matrix(testData[, colnames(trainData) %ni% c("lpsa", "train")])
@selva86
selva86 / ks_plot_example.R
Created October 5, 2017 07:11
Reproducible example for ks_plot
library(InformationValue)
library(ggplot2)
# 1. Import dataset
trainData <- read.csv('https://raw.githubusercontent.com/selva86/datasets/master/breastcancer_training.csv')
testData <- read.csv('https://raw.githubusercontent.com/selva86/datasets/master/breastcancer_test.csv')
# 2. Build Logistic Model
logitmod <- glm(Class ~ Cl.thickness + Cell.size + Cell.shape, family = "binomial", data=trainData)
# 3. Predict on testData
@selva86
selva86 / kolmogorov_smirnov_chart.R
Last active October 5, 2017 07:18
Function to reproduce the KS Chart in machinelearningplus.com/evaluation-metrics-classification-models
library(InformationValue)
library(ggplot2)
ks_plot <- function (actuals, predictedScores) {
rank <- 0:10
ks_table_out <- InformationValue:::ks_table(actuals = actuals, predictedScores = predictedScores)
perc_positive <- c(0, ks_table_out$cum_perc_responders) * 100
perc_negative <- c(0, ks_table_out$cum_perc_non_responders) * 100
random_prediction <- seq(0, 100, 10)
df <- data.frame(rank, random_prediction, perc_positive, perc_negative)
df_stack <- stack(df, c(random_prediction, perc_positive, perc_negative))
# Pre-create a 'pizza_tc_score' vector with missing values
set.seed(100)
pizza_tc_score <- round(runif (1000,3,10))
pizza_tc_score [c(100,204,709,816,938)] = NA
@selva86
selva86 / 3_12.R
Created December 23, 2019 14:39
Mini Challenge for R Course
# Mini Challenge Inputs
vans <- c(3,4,5,2,4,4,5)
boxes <- c(30,44,50,18,36,36,40)