affo/dish_classifier.py

## dish_classifier.py
import random
import numpy as np

# Ok, the point is now to create a naive
# bayes model and stop without accounting
# for the lab...

# We want to model for example the food.
# The food can be terrific, bad, not bad, good
# or awesome:
classes_labels = {
    0: 'terrific',
    1: 'bad',
    2: 'not_bad',
    3: 'good',
    4: 'awesome'
}

# In the last days, we experienced a lot of different dishes,
# and we could classify them.
# We could distinguish different ingredients:
features = np.array(['tomato', 'salad', 'coriander', 'rice', 'potatoes',
        'beans', 'pork', 'beef', 'bacalao', 'salmon', 'clams',
        'sugar'])

# And this was our experience:
def dish_repr(dish):
    indices = np.nonzero(dish)
    return features[indices]

def gen_dish():
    likely = [
        ('tomato', 'salad'),
        ('tomato', 'salad', 'rice'),
        ('pork', 'clams', 'potatoes'),
        ('bacalao', 'coriander'),
        ('salmon', 'coriander'),
        ('beef', 'coriander'),
        ('pork', 'coriander'),
        ('pork', 'potatoes'),
        ('beef', 'potatoes'),
    ]
    unlikely = [
        ('bacalao', 'sugar'),
        ('beef', 'sugar'),
        ('salmon', 'sugar'),
        ('clams', 'sugar'),
        ('pork', 'sugar'),
        ('pork', 'beef'),
        ('pork', 'bacalao'),
        ('beef', 'bacalao'),
    ]

    def get_ingredients():
        return np.random.randint(0, 2, len(features))

    def get_class(dish):
        dish_r = dish_repr(dish)

        def sublist_exists(sub, l):
            for el in sub:
                if not el in l: return False
            return True

        no_likely = [sublist_exists(l, dish_r) for l in likely].count(True)
        no_unlikely = [sublist_exists(u, dish_r) for u in unlikely].count(True)

        no_cl = len(classes_labels)

        lik_contrib = len(likely) / float(no_cl)
        unlik_contrib = len(unlikely) / float(no_cl)

        base_score = no_cl / 2
        score = base_score + int(lik_contrib * no_likely - unlik_contrib * no_unlikely)
        # normalize
        if score < 0: score = 0
        if score > no_cl: score = no_cl

        return score

    dish = get_ingredients()
    return dish, get_class(dish)

NO_DISHES = 1500
x = np.zeros((NO_DISHES, len(features)), dtype=np.int)
y = np.zeros((NO_DISHES,), dtype=np.int)

for i in xrange(NO_DISHES):
    x[i, :], y[i] = gen_dish()

# Now we can get a classifier.
# Let's calculate priors:
no_classes = len(classes_labels)
priors = np.zeros((no_classes,), dtype=np.float64)
for i in xrange(no_classes):
    no_dishes = np.where(y == i)[0].shape[0]
    priors[i] = no_dishes / float(no_classes)

# And conditionals:
conds = np.zeros((len(features), no_classes), dtype=np.float64)
# for each class
for i in xrange(no_classes):
    # extract the dishes matching that class
    indices = np.where(y == i)[0]

    if indices.shape[0] == 0:
        continue

    dishes_in_class = x[indices, :]

    # count the number of dishes holding
    # a precise feature (active feature)
    no_dishes_in_class_per_f = dishes_in_class.sum(0)

    total_dishes_in_class = no_dishes_in_class_per_f.sum()
    conds[:, i] = no_dishes_in_class_per_f / float(total_dishes_in_class)

# Now we can write the classifier:
def classify(dish):
    res = priors.copy()

    for i in xrange(no_classes):
        indices = np.nonzero(dish)
        probs = dish[indices] * conds[indices, i]
        res[i] *= probs.prod()

    print res
    i = np.argmax(res)
    return classes_labels[i]

if __name__ == '__main__':
    while True:
        raw_input()

        dish, _ = gen_dish()
        print dish_repr(dish)
        print classify(dish)
        print
	import random
	import numpy as np

	# Ok, the point is now to create a naive
	# bayes model and stop without accounting
	# for the lab...

	# We want to model for example the food.
	# The food can be terrific, bad, not bad, good
	# or awesome:
	classes_labels = {
	0: 'terrific',
	1: 'bad',
	2: 'not_bad',
	3: 'good',
	4: 'awesome'
	}

	# In the last days, we experienced a lot of different dishes,
	# and we could classify them.
	# We could distinguish different ingredients:
	features = np.array(['tomato', 'salad', 'coriander', 'rice', 'potatoes',
	'beans', 'pork', 'beef', 'bacalao', 'salmon', 'clams',
	'sugar'])

	# And this was our experience:
	def dish_repr(dish):
	indices = np.nonzero(dish)
	return features[indices]

	def gen_dish():
	likely = [
	('tomato', 'salad'),
	('tomato', 'salad', 'rice'),
	('pork', 'clams', 'potatoes'),
	('bacalao', 'coriander'),
	('salmon', 'coriander'),
	('beef', 'coriander'),
	('pork', 'coriander'),
	('pork', 'potatoes'),
	('beef', 'potatoes'),
	]
	unlikely = [
	('bacalao', 'sugar'),
	('beef', 'sugar'),
	('salmon', 'sugar'),
	('clams', 'sugar'),
	('pork', 'sugar'),
	('pork', 'beef'),
	('pork', 'bacalao'),
	('beef', 'bacalao'),
	]

	def get_ingredients():
	return np.random.randint(0, 2, len(features))

	def get_class(dish):
	dish_r = dish_repr(dish)

	def sublist_exists(sub, l):
	for el in sub:
	if not el in l: return False
	return True

	no_likely = [sublist_exists(l, dish_r) for l in likely].count(True)
	no_unlikely = [sublist_exists(u, dish_r) for u in unlikely].count(True)

	no_cl = len(classes_labels)

	lik_contrib = len(likely) / float(no_cl)
	unlik_contrib = len(unlikely) / float(no_cl)

	base_score = no_cl / 2
	score = base_score + int(lik_contrib * no_likely - unlik_contrib * no_unlikely)
	# normalize
	if score < 0: score = 0
	if score > no_cl: score = no_cl

	return score

	dish = get_ingredients()
	return dish, get_class(dish)

	NO_DISHES = 1500
	x = np.zeros((NO_DISHES, len(features)), dtype=np.int)
	y = np.zeros((NO_DISHES,), dtype=np.int)

	for i in xrange(NO_DISHES):
	x[i, :], y[i] = gen_dish()

	# Now we can get a classifier.
	# Let's calculate priors:
	no_classes = len(classes_labels)
	priors = np.zeros((no_classes,), dtype=np.float64)
	for i in xrange(no_classes):
	no_dishes = np.where(y == i)[0].shape[0]
	priors[i] = no_dishes / float(no_classes)

	# And conditionals:
	conds = np.zeros((len(features), no_classes), dtype=np.float64)
	# for each class
	for i in xrange(no_classes):
	# extract the dishes matching that class
	indices = np.where(y == i)[0]

	if indices.shape[0] == 0:
	continue

	dishes_in_class = x[indices, :]

	# count the number of dishes holding
	# a precise feature (active feature)
	no_dishes_in_class_per_f = dishes_in_class.sum(0)

	total_dishes_in_class = no_dishes_in_class_per_f.sum()
	conds[:, i] = no_dishes_in_class_per_f / float(total_dishes_in_class)

	# Now we can write the classifier:
	def classify(dish):
	res = priors.copy()

	for i in xrange(no_classes):
	indices = np.nonzero(dish)
	probs = dish[indices] * conds[indices, i]
	res[i] *= probs.prod()

	print res
	i = np.argmax(res)
	return classes_labels[i]

	if __name__ == '__main__':
	while True:
	raw_input()

	dish, _ = gen_dish()
	print dish_repr(dish)
	print classify(dish)
	print