Skip to content

Instantly share code, notes, and snippets.

@S0ngyuLi
Last active February 2, 2018 03:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save S0ngyuLi/c2188e22414c1ca44c206db6e6e73bfa to your computer and use it in GitHub Desktop.
Save S0ngyuLi/c2188e22414c1ca44c206db6e6e73bfa to your computer and use it in GitHub Desktop.
Naive Bayes Classifier in R
require(e1071)
pima_data <- read.csv(file = "pima.data", header=TRUE)
#load data into data frame
names(pima_data) <- c(1:9)
# 1. Number of times pregnant
# 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test
# 3. Diastolic blood pressure (mm Hg)
# 4. Triceps skin fold thickness (mm)
# 5. 2-Hour serum insulin (mu U/ml)
# 6. Body mass index (weight in kg/(height in m)^2)
# 7. Diabetes pedigree function
# 8. Age (years)
# 9. Class variable (0 or 1)
training_portion <- 0.8
h <- c(train = training_portion, test = 1-training_portion)
g <- sample(cut(seq(nrow(pima_data)), nrow(pima_data)*cumsum(c(0,h)), labels = names(h)))
epoch_data <- split(pima_data, g)
train_data <- epoch_data$train
test_data <- epoch_data$test
train_class_0 <- train_data[train_data$'9'==0,]
train_class_1 <- train_data[train_data$'9'==1,]
train_class_0_var_by_par <- lapply(train_class_0[,c(1:8)], var)
train_class_1_var_by_par <- lapply(train_class_1[,c(1:8)], var)
train_class_0_mean_by_par <- lapply(train_class_0[,c(1:8)], mean)
train_class_1_mean_by_par <- lapply(train_class_1[,c(1:8)], mean)
p_label_0 = nrow(train_class_0) / nrow(train_data)
p_label_1 = 1.0 - p_label_0
p_condition_class <- function (attr, val, class) {
if (attr < 1 || attr > 8) {
return(-1)
}
else {
var_res <- 0.0
mean_res <- 0.0
if (class == 1) {
var_res <- train_class_1_var_by_par[[toString(attr)]]
mean_res <- train_class_1_mean_by_par[[toString(attr)]]
}
else {
if (class == 0) {
var_res <- train_class_0_var_by_par[[toString(attr)]]
mean_res <- train_class_0_mean_by_par[[toString(attr)]]
}
else {
return(-1)
}
}
ret <- (1.0 / (sqrt(2.0 * pi * var_res))) * exp(-1 * (val - mean_res)^2 / (2 * var_res))
return(ret)
}
}
naive_bayes_formula <- function (row) {
condition_prob_0 <- 1.0
for (i in c(1:8)) {
condition_prob_0 <- condition_prob_0 * (p_condition_class(i,row[toString(i)],0))
}
condition_prob_0 <- condition_prob_0 * p_label_0
condition_prob_1 <- 1.0
for (i in c(1:8)) {
condition_prob_1 <- condition_prob_1 * (p_condition_class(i,row[toString(i)],1))
}
condition_prob_1 <- condition_prob_1 * p_label_1
if (condition_prob_0 < condition_prob_1) {
return(1)
}
else {
return(0)
}
}
accuracy = 0.0
for (i in c(1:nrow(test_data))) {
estimated_label <- (naive_bayes_formula(test_data[i,]))
if (estimated_label == test_data[i, '9']) {
accuracy = accuracy + 1.0
}
}
print (accuracy/nrow(test_data))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment