Skip to content

Instantly share code, notes, and snippets.

@affo
Created July 26, 2016 17:11
Show Gist options
  • Save affo/47768253e9bcc28c9492416f7e479619 to your computer and use it in GitHub Desktop.
Save affo/47768253e9bcc28c9492416f7e479619 to your computer and use it in GitHub Desktop.
Naive bayes dish classifier
import random
import numpy as np
# Ok, the point is now to create a naive
# bayes model and stop without accounting
# for the lab...
# We want to model for example the food.
# The food can be terrific, bad, not bad, good
# or awesome:
classes_labels = {
0: 'terrific',
1: 'bad',
2: 'not_bad',
3: 'good',
4: 'awesome'
}
# In the last days, we experienced a lot of different dishes,
# and we could classify them.
# We could distinguish different ingredients:
features = np.array(['tomato', 'salad', 'coriander', 'rice', 'potatoes',
'beans', 'pork', 'beef', 'bacalao', 'salmon', 'clams',
'sugar'])
# And this was our experience:
def dish_repr(dish):
indices = np.nonzero(dish)
return features[indices]
def gen_dish():
likely = [
('tomato', 'salad'),
('tomato', 'salad', 'rice'),
('pork', 'clams', 'potatoes'),
('bacalao', 'coriander'),
('salmon', 'coriander'),
('beef', 'coriander'),
('pork', 'coriander'),
('pork', 'potatoes'),
('beef', 'potatoes'),
]
unlikely = [
('bacalao', 'sugar'),
('beef', 'sugar'),
('salmon', 'sugar'),
('clams', 'sugar'),
('pork', 'sugar'),
('pork', 'beef'),
('pork', 'bacalao'),
('beef', 'bacalao'),
]
def get_ingredients():
return np.random.randint(0, 2, len(features))
def get_class(dish):
dish_r = dish_repr(dish)
def sublist_exists(sub, l):
for el in sub:
if not el in l: return False
return True
no_likely = [sublist_exists(l, dish_r) for l in likely].count(True)
no_unlikely = [sublist_exists(u, dish_r) for u in unlikely].count(True)
no_cl = len(classes_labels)
lik_contrib = len(likely) / float(no_cl)
unlik_contrib = len(unlikely) / float(no_cl)
base_score = no_cl / 2
score = base_score + int(lik_contrib * no_likely - unlik_contrib * no_unlikely)
# normalize
if score < 0: score = 0
if score > no_cl: score = no_cl
return score
dish = get_ingredients()
return dish, get_class(dish)
NO_DISHES = 1500
x = np.zeros((NO_DISHES, len(features)), dtype=np.int)
y = np.zeros((NO_DISHES,), dtype=np.int)
for i in xrange(NO_DISHES):
x[i, :], y[i] = gen_dish()
# Now we can get a classifier.
# Let's calculate priors:
no_classes = len(classes_labels)
priors = np.zeros((no_classes,), dtype=np.float64)
for i in xrange(no_classes):
no_dishes = np.where(y == i)[0].shape[0]
priors[i] = no_dishes / float(no_classes)
# And conditionals:
conds = np.zeros((len(features), no_classes), dtype=np.float64)
# for each class
for i in xrange(no_classes):
# extract the dishes matching that class
indices = np.where(y == i)[0]
if indices.shape[0] == 0:
continue
dishes_in_class = x[indices, :]
# count the number of dishes holding
# a precise feature (active feature)
no_dishes_in_class_per_f = dishes_in_class.sum(0)
total_dishes_in_class = no_dishes_in_class_per_f.sum()
conds[:, i] = no_dishes_in_class_per_f / float(total_dishes_in_class)
# Now we can write the classifier:
def classify(dish):
res = priors.copy()
for i in xrange(no_classes):
indices = np.nonzero(dish)
probs = dish[indices] * conds[indices, i]
res[i] *= probs.prod()
print res
i = np.argmax(res)
return classes_labels[i]
if __name__ == '__main__':
while True:
raw_input()
dish, _ = gen_dish()
print dish_repr(dish)
print classify(dish)
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment