leferrad/metrics_supervised.py

## metrics_supervised.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Metrics for measuring the performance of a model in a given dataset,
in both kinds of problems: *classification* and *regression*.

Implementation extracted from: https://github.com/leferrad/learninspy/blob/master/learninspy/utils/evaluation.py

NOTE: Only working with Python 2.7.x
"""

__author__ = 'leferrad'

import numpy as np


class ClassificationMetrics(object):
    """
    Metrics to evaluate the performance of a model in terms of classification.

    Based on the metrics list presented in the publication of Sokolova et.al. [sokolova2009systematic]_.

    :param predicted_actual: list of tuples (predicted, actual)
    :param n_classes: int, number of classes handled in the classification task

    >>> predict = [0, 1, 0, 2, 2, 1]
    >>> labels = [0, 1, 1, 2, 1, 0]
    >>> metrics = ClassificationMetrics(zip(predict, labels), 3)
    >>> metrics.measures.keys()
    ['Recall', 'F-measure', 'Precision', 'Accuracy']
    >>> metrics.accuracy()
    0.5
    >>> metrics.f_measure()
    0.5499999999999999
    >>> metrics.precision()
    0.5
    >>> metrics.evaluate('Recall')
    0.611111111111111
    >>> metrics.confusion_matrix()
    array([[1, 1, 0],
           [1, 1, 1],
           [0, 0, 1]])

    **References**:

    .. [sokolova2009systematic] Sokolova, M., & Lapalme, G. (2009).
        A systematic analysis of performance measures for classification tasks.
        Information Processing & Management, 45(4), 427-437.
    """

    # See http://machine-learning.tumblr.com/post/1209400132/mathematical-definitions-for-precisionrecall-for
    # See http://rali.iro.umontreal.ca/rali/sites/default/files/publis/SokolovaLapalme-JIPM09.pdf
    def __init__(self, predicted_actual, n_classes):
        self.predicted_actual = predicted_actual
        self.tp = []
        self.fp = []
        self.fn = []
        for c in xrange(n_classes):
            self.tp.append(sum(map(lambda (p, a): p == c and a == c, predicted_actual)))
            self.fp.append(sum(map(lambda (p, a): p == c and a != c, predicted_actual)))
            self.fn.append(sum(map(lambda (p, a): p != c and a == c, predicted_actual)))
        self.n_classes = n_classes
        self.n_elem = len(predicted_actual)
        self.measures = {'F-measure': self.f_measure, 'Accuracy': self.accuracy,
                         'Precision': self.precision, 'Recall': self.recall}

    def accuracy(self, label=None):
        """
        Calculates the accuracy of classification, as the rate of hits over the total.

        Given the number of classes 'C', the formula for this value is:

        :math:`ACC=\\dfrac{1}{C}\displaystyle\sum\limits_{i=0}^{C-1} \\frac{TP_i + TN_i}{TP_i+FN_i+FP_i+TN_i}`

        :param label: int in {0, C - 1} to indicate which class to evaluate. If *None* then it evaluates over all classes.
        :return: float, varying from 0 (worst) to 1 (best).
        """
        if label is None:
            acc = sum(map(lambda (pre, act): pre == act, self.predicted_actual)) / float(self.n_elem)
        else:
            acc = sum(map(lambda (pre, act): pre == act == label, self.predicted_actual)) / \
                                float(sum([act == label for _, act in self.predicted_actual]))
        return acc

    def precision(self, label=None, macro=True):
        """
        Calculates the precision of classification, as the amount of **true positives**
        (i.e. number of items that are correctly classified) divided by the total of elements classified
        for a given class (i.e. sum of true positives and **false positives**, that are the items
        incorrectly classified as the given class). This is expressed in the following equation:

        :math:`P_i=\\dfrac{TP_i}{TP_i+FP_i}`

        Given the number of classes 'C', the equations for micro- and macro-averaging are:

        :math:`P_{\\mu}=\\dfrac{\sum_{i=0}^{C-1} TP_i}{\sum_i TP_i+FP_i}, \quad
        P_{M}=\\dfrac{1}{C}\displaystyle\sum\limits_{i=0}^{C-1} \\frac{TP_i}{TP_i+FP_i}`

        :param label: int in {0, C - 1} to indicate which class to evaluate. If *None* then it evaluates over all classes.
        :param macro: bool, that indicates how to calculate this value for all the classes
         (True for *macro* and False for *micro).
        :return: float, varying from 0 (worst) to 1 (best).
        """

        if label is None:
            if macro is True:
                p = sum([self.precision(c) for c in xrange(self.n_classes)])
                p /= float(self.n_classes)
            else:
                p = sum(self.tp) / float(sum(map(lambda (tp, fp): tp + fp, zip(self.tp, self.fp))))
        else:
            if self.tp[label] == 0.0 and self.fp[label] == 0.0:
                p = 1.0
            else:
                p = self.tp[label] / float(self.tp[label] + self.fp[label])
        return p

    def recall(self, label=None, macro=True):
        """
        Calculates the recall of classification, as the amount of **true positives*
        (i.e. number of items that are correctly classified) divided by the total of elements that
        belong to the given class (i.e. sum of true positives and **false negatives**, that are the
        items incorrectly not classified as the given class). This is expressed in the following equation:

        :math:`R_i=\\dfrac{TP_i}{TP_i+FN_i}`

        Given the number of classes 'C', the equations for micro- and macro-averaging are:

        :math:`R_{\\mu}=\\dfrac{\sum_{i=0}^{C-1} TP_i}{\sum_i TP_i+FN_i}, \quad
        R_{M}=\\dfrac{1}{C}\displaystyle\sum\limits_{i=0}^{C-1} \\frac{TP_i}{TP_i+FN_i}`

        :param label: int in {0, C - 1} to indicate which class to evaluate. If *None* then it evaluates over all classes.
        :param macro: bool, that indicates how to calculate this value for all the classes
         (True for *macro* and False for *micro).
        :return: float, varying from 0 (worst) to 1 (best).
        """
        if label is None:
            if macro is True:
                r = sum([self.recall(c) for c in xrange(self.n_classes)])
                r /= float(self.n_classes)
            else:
                r = sum(self.tp) / float(sum(map(lambda (tp, fn): tp + fn, zip(self.tp, self.fn))))
        else:
            if self.tp[label] == 0.0 and self.fn[label] == 0.0:
                r = 1.0
            else:
                r = self.tp[label] / float(self.tp[label] + self.fn[label])
        return r

    def f_measure(self, beta=1, label=None, macro=True):
        """
        Calcula el *F-measure* de la clasificación, el cual combina las medidas de *precision* y *recall* mediante
        una media armónica de ambos. Dicho balance es ajustado por un parámetro :math:`\\beta`, y un caso
        muy utilizado de esta medida es el *F1-score* donde se pondera igual a ambas medidas con :math:`\\beta = 1`.

        :math:`F(\\beta)=(1+\\beta)(\\dfrac{PR}{\\beta^2 P + R}), \quad F_1=\\dfrac{2PR}{P + R}`

        Given the number of classes 'C', the equations for micro- and macro-averaging are:

        :math:`F_{\\mu}(\\beta)=(1+\\beta)(\\dfrac{P_{\\mu}R_{\\mu}}{\\beta^2 P_{\\mu} + R_{\\mu}}), \quad
        F_{M}(\\beta)=(1+\\beta)(\\dfrac{P_{M}R_{M}}{\\beta^2 P_{M} + R_{M}})`


        :param beta: float, parameter :math:`\\beta` which indicates the balance between *precision* and *recall*.
         If :math:`\\beta < 1` then the *precision* is weighted, and with :math:`\\beta > 1` the *recall* is favored..
        :param label: int in {0, C - 1} to indicate which class to evaluate. If *None* then it evaluates over all classes.
        :param macro: bool, that indicates how to calculate this value for all the classes
         (True for *macro* and False for *micro).
        :return: float, varying from 0 (worst) to 1 (best).
        """
        ppv = self.precision(label, macro)
        tpr = self.recall(label, macro)
        if ppv == 0 and tpr == 0:
            f_score = 0.0
        else:
            f_score = (1 + beta*beta)*(ppv * tpr) / (beta*beta*ppv + tpr)
        return f_score

    def confusion_matrix(self):
        """
        Confusion matrix resulting, where the columns correspond to *predicted* values
        that are ordered in ascending way for each *actual* class.

        :return: numpy.ndarray
        """
        conf_mat = []
        for r in xrange(self.n_classes):
            pre_act = filter(lambda (p, a): a == r, self.predicted_actual)
            for c in xrange(self.n_classes):
                conf_mat.append(sum(map(lambda (p, a): p == c, pre_act)))
        return np.array(conf_mat).reshape((self.n_classes, self.n_classes))

    def evaluate(self, measure='F-measure', **kwargs):
        """
        Applies some of the implemented metrics, that are registered in the dict *self.measures*.
        This function becomes useful as a wrapper to easily test every metric desired.

        :param measure: string, key of an implemented metric.
        :param kwargs: dict, it can include some other parameters for the metric to be used
         (e.g. *beta* for *F-measure*, or *micro / macro* for those ones that support it).
        :return: float
        """
        assert measure in self.measures.keys(), ValueError("Measure %s doesn't belong to the supported ones: %s",
                                                           str(measure), str(self.measures.keys()))
        return self.measures[measure](**kwargs)


class RegressionMetrics(object):
    """
    Metrics to evaluate the performance of a model in terms of regression.

    :param predicted_actual: list of tuples (predicted, actual)

    >>> predict = [0.5, 1.1, 1.5, 2.0, 3.5, 5.2]
    >>> labels = [0.5, 1.0, 2.0, 3.0, 4.0, 5.0]
    >>> metrics = RegressionMetrics(zip(predict, labels))
    >>> metrics.measures.keys()
    ['ExplVar', 'MSE', 'MAE', 'R2', 'RMSE']
    >>> metrics.mae()
    2.3000000000000003
    >>> metrics.mse()
    0.25833333333333336
    >>> metrics.evaluate('RMSE')
    0.50826502273256358
    >>> metrics.r2()
    0.8980821917808219
    >>> metrics.explained_variance()
    0.9297534246575342
    """
    def __init__(self, predicted_actual):
        self.predicted_actual = predicted_actual
        self.n_elem = len(predicted_actual)
        self.error = map(lambda (p, a): a - p, self.predicted_actual)
        self.measures = {'MSE': self.mse, 'RMSE': self.rmse, 'MAE': self.mae,
                        'R2': self.r2, 'ExplVar': self.explained_variance}

    def mse(self):
        """
        Calculates the *Mean Squared Error* (MSE), defined as the sum of the squares of the differences
        betweem the actual value and the predicted one for each of the *N* samples:

        :math:`MSE=\\dfrac{1}{N}\displaystyle\sum\limits_{i}^N (p_i - a_i)^2`

        :return: float, varying from 0 (best) and inf (worst).
        """
        return np.mean(np.square(self.error))

    def rmse(self):
        """
        Returns the square root of the MSE, which is useful to not depend on a scale while comparing
        performance among several models.

        :math:`RMSE=\\sqrt{MSE}`

        :return: float, varying from 0 (best) and inf (worst).
        """
        return np.sqrt(self.mse())

    def mae(self):
        """
        Calculates the *Mean Absolute Error* (MAE), defined as the sum over the absolute differences between
        the actual value and the predicted one for each of the *N* samples:

        :math:`MAE=\\dfrac{1}{N}\displaystyle\sum\limits_{i}^N |p_i - a_i|`

        :return: float, varying from 0 (best) and inf (worst).
        """
        return np.mean(np.abs(self.error))

    def rmae(self):
        """
        Returns the square root of the MAE, which is useful to not depend on a scale while comparing
        performance among several models.

        :math:`RMAE=\\sqrt{MAE}`

        :return: float, varying from 0 (best) and inf (worst).
        """

        return np.sqrt(self.mae())

    def r2(self):
        """
        Calculates the coefficient of determination or R^2, which indicates the proportion of the
        variance of the *actual* values that are explained by the *predicted* values.

        See more info in Wikipedia: `Coefficient of determination
        <https://en.wikipedia.org/wiki/Coefficient_of_determination>`_.

        :return: float, varying from 0 (worst) and 1 (best).
        """
        mean_actual = np.mean(map(lambda (p, a): a, self.predicted_actual))
        ssres = np.sum(np.square(self.error))
        sstot = np.sum(np.square(map(lambda (p, a): a - mean_actual, self.predicted_actual)))
        return 1 - float(ssres / sstot)

    def explained_variance(self):
        """
        Calculates the explained variance in the prediction over the actual values, such that:

        :math:`ExpVar=1-\\dfrac{Var(actual - predicted)}{Var(actual)}`

        :return: float, varying from 0 (worst) and 1 (best).
        """
        var_error = np.var(self.error)
        var_actual = np.var(map(lambda (p, a): a, self.predicted_actual))
        return 1 - float(var_error / var_actual)

    def evaluate(self, measure='R2'):
        """
        Applies some of the implemented metrics, that are registered in the dict *self.measures*.
        This function becomes useful as a wrapper to easily test every metric desired.

        :param measure: string, key of an implemented metric.
        :return: float
        """
        assert measure in self.measures.keys(), ValueError("Measure %s doesn't belong to the supported ones: %s",
                                                           str(measure), str(self.measures.keys()))
        return self.measures[measure]()
	#!/usr/bin/env python
	# -- coding: utf-8 --

	"""
	Metrics for measuring the performance of a model in a given dataset,
	in both kinds of problems: classification and regression.

	Implementation extracted from: https://github.com/leferrad/learninspy/blob/master/learninspy/utils/evaluation.py

	NOTE: Only working with Python 2.7.x
	"""

	__author__ = 'leferrad'

	import numpy as np


	class ClassificationMetrics(object):
	"""
	Metrics to evaluate the performance of a model in terms of classification.

	Based on the metrics list presented in the publication of Sokolova et.al. [sokolova2009systematic]_.

	:param predicted_actual: list of tuples (predicted, actual)
	:param n_classes: int, number of classes handled in the classification task

	>>> predict = [0, 1, 0, 2, 2, 1]
	>>> labels = [0, 1, 1, 2, 1, 0]
	>>> metrics = ClassificationMetrics(zip(predict, labels), 3)
	>>> metrics.measures.keys()
	['Recall', 'F-measure', 'Precision', 'Accuracy']
	>>> metrics.accuracy()
	0.5
	>>> metrics.f_measure()
	0.5499999999999999
	>>> metrics.precision()
	0.5
	>>> metrics.evaluate('Recall')
	0.611111111111111
	>>> metrics.confusion_matrix()
	array([[1, 1, 0],
	[1, 1, 1],
	[0, 0, 1]])

	References:

	.. [sokolova2009systematic] Sokolova, M., & Lapalme, G. (2009).
	A systematic analysis of performance measures for classification tasks.
	Information Processing & Management, 45(4), 427-437.
	"""

	# See http://machine-learning.tumblr.com/post/1209400132/mathematical-definitions-for-precisionrecall-for
	# See http://rali.iro.umontreal.ca/rali/sites/default/files/publis/SokolovaLapalme-JIPM09.pdf
	def __init__(self, predicted_actual, n_classes):
	self.predicted_actual = predicted_actual
	self.tp = []
	self.fp = []
	self.fn = []
	for c in xrange(n_classes):
	self.tp.append(sum(map(lambda (p, a): p == c and a == c, predicted_actual)))
	self.fp.append(sum(map(lambda (p, a): p == c and a != c, predicted_actual)))
	self.fn.append(sum(map(lambda (p, a): p != c and a == c, predicted_actual)))
	self.n_classes = n_classes
	self.n_elem = len(predicted_actual)
	self.measures = {'F-measure': self.f_measure, 'Accuracy': self.accuracy,
	'Precision': self.precision, 'Recall': self.recall}

	def accuracy(self, label=None):
	"""
	Calculates the accuracy of classification, as the rate of hits over the total.

	Given the number of classes 'C', the formula for this value is:

	:math:`ACC=\\dfrac{1}{C}\displaystyle\sum\limits_{i=0}^{C-1} \\frac{TP_i + TN_i}{TP_i+FN_i+FP_i+TN_i}`

	:param label: int in {0, C - 1} to indicate which class to evaluate. If None then it evaluates over all classes.
	:return: float, varying from 0 (worst) to 1 (best).
	"""
	if label is None:
	acc = sum(map(lambda (pre, act): pre == act, self.predicted_actual)) / float(self.n_elem)
	else:
	acc = sum(map(lambda (pre, act): pre == act == label, self.predicted_actual)) / \
	float(sum([act == label for _, act in self.predicted_actual]))
	return acc

	def precision(self, label=None, macro=True):
	"""
	Calculates the precision of classification, as the amount of true positives
	(i.e. number of items that are correctly classified) divided by the total of elements classified
	for a given class (i.e. sum of true positives and false positives, that are the items
	incorrectly classified as the given class). This is expressed in the following equation:

	:math:`P_i=\\dfrac{TP_i}{TP_i+FP_i}`

	Given the number of classes 'C', the equations for micro- and macro-averaging are:

	:math:`P_{\\mu}=\\dfrac{\sum_{i=0}^{C-1} TP_i}{\sum_i TP_i+FP_i}, \quad
	P_{M}=\\dfrac{1}{C}\displaystyle\sum\limits_{i=0}^{C-1} \\frac{TP_i}{TP_i+FP_i}`

	:param label: int in {0, C - 1} to indicate which class to evaluate. If None then it evaluates over all classes.
	:param macro: bool, that indicates how to calculate this value for all the classes
	(True for macro and False for *micro).
	:return: float, varying from 0 (worst) to 1 (best).
	"""

	if label is None:
	if macro is True:
	p = sum([self.precision(c) for c in xrange(self.n_classes)])
	p /= float(self.n_classes)
	else:
	p = sum(self.tp) / float(sum(map(lambda (tp, fp): tp + fp, zip(self.tp, self.fp))))
	else:
	if self.tp[label] == 0.0 and self.fp[label] == 0.0:
	p = 1.0
	else:
	p = self.tp[label] / float(self.tp[label] + self.fp[label])
	return p

	def recall(self, label=None, macro=True):
	"""
	Calculates the recall of classification, as the amount of *true positives
	(i.e. number of items that are correctly classified) divided by the total of elements that
	belong to the given class (i.e. sum of true positives and false negatives, that are the
	items incorrectly not classified as the given class). This is expressed in the following equation:

	:math:`R_i=\\dfrac{TP_i}{TP_i+FN_i}`

	Given the number of classes 'C', the equations for micro- and macro-averaging are:

	:math:`R_{\\mu}=\\dfrac{\sum_{i=0}^{C-1} TP_i}{\sum_i TP_i+FN_i}, \quad
	R_{M}=\\dfrac{1}{C}\displaystyle\sum\limits_{i=0}^{C-1} \\frac{TP_i}{TP_i+FN_i}`

	:param label: int in {0, C - 1} to indicate which class to evaluate. If None then it evaluates over all classes.
	:param macro: bool, that indicates how to calculate this value for all the classes
	(True for macro and False for *micro).
	:return: float, varying from 0 (worst) to 1 (best).
	"""
	if label is None:
	if macro is True:
	r = sum([self.recall(c) for c in xrange(self.n_classes)])
	r /= float(self.n_classes)
	else:
	r = sum(self.tp) / float(sum(map(lambda (tp, fn): tp + fn, zip(self.tp, self.fn))))
	else:
	if self.tp[label] == 0.0 and self.fn[label] == 0.0:
	r = 1.0
	else:
	r = self.tp[label] / float(self.tp[label] + self.fn[label])
	return r

	def f_measure(self, beta=1, label=None, macro=True):
	"""
	Calcula el F-measure de la clasificación, el cual combina las medidas de precision y recall mediante
	una media armónica de ambos. Dicho balance es ajustado por un parámetro :math:`\\beta`, y un caso
	muy utilizado de esta medida es el F1-score donde se pondera igual a ambas medidas con :math:`\\beta = 1`.

	:math:`F(\\beta)=(1+\\beta)(\\dfrac{PR}{\\beta^2 P + R}), \quad F_1=\\dfrac{2PR}{P + R}`

	Given the number of classes 'C', the equations for micro- and macro-averaging are:

	:math:`F_{\\mu}(\\beta)=(1+\\beta)(\\dfrac{P_{\\mu}R_{\\mu}}{\\beta^2 P_{\\mu} + R_{\\mu}}), \quad
	F_{M}(\\beta)=(1+\\beta)(\\dfrac{P_{M}R_{M}}{\\beta^2 P_{M} + R_{M}})`


	:param beta: float, parameter :math:`\\beta` which indicates the balance between precision and recall.
	If :math:`\\beta < 1` then the precision is weighted, and with :math:`\\beta > 1` the recall is favored..
	:param label: int in {0, C - 1} to indicate which class to evaluate. If None then it evaluates over all classes.
	:param macro: bool, that indicates how to calculate this value for all the classes
	(True for macro and False for *micro).
	:return: float, varying from 0 (worst) to 1 (best).
	"""
	ppv = self.precision(label, macro)
	tpr = self.recall(label, macro)
	if ppv == 0 and tpr == 0:
	f_score = 0.0
	else:
	f_score = (1 + betabeta)(ppv * tpr) / (betabetappv + tpr)
	return f_score

	def confusion_matrix(self):
	"""
	Confusion matrix resulting, where the columns correspond to predicted values
	that are ordered in ascending way for each actual class.

	:return: numpy.ndarray
	"""
	conf_mat = []
	for r in xrange(self.n_classes):
	pre_act = filter(lambda (p, a): a == r, self.predicted_actual)
	for c in xrange(self.n_classes):
	conf_mat.append(sum(map(lambda (p, a): p == c, pre_act)))
	return np.array(conf_mat).reshape((self.n_classes, self.n_classes))

	def evaluate(self, measure='F-measure', **kwargs):
	"""
	Applies some of the implemented metrics, that are registered in the dict self.measures.
	This function becomes useful as a wrapper to easily test every metric desired.

	:param measure: string, key of an implemented metric.
	:param kwargs: dict, it can include some other parameters for the metric to be used
	(e.g. beta for F-measure, or micro / macro for those ones that support it).
	:return: float
	"""
	assert measure in self.measures.keys(), ValueError("Measure %s doesn't belong to the supported ones: %s",
	str(measure), str(self.measures.keys()))
	return self.measures[measure](**kwargs)


	class RegressionMetrics(object):
	"""
	Metrics to evaluate the performance of a model in terms of regression.

	:param predicted_actual: list of tuples (predicted, actual)

	>>> predict = [0.5, 1.1, 1.5, 2.0, 3.5, 5.2]
	>>> labels = [0.5, 1.0, 2.0, 3.0, 4.0, 5.0]
	>>> metrics = RegressionMetrics(zip(predict, labels))
	>>> metrics.measures.keys()
	['ExplVar', 'MSE', 'MAE', 'R2', 'RMSE']
	>>> metrics.mae()
	2.3000000000000003
	>>> metrics.mse()
	0.25833333333333336
	>>> metrics.evaluate('RMSE')
	0.50826502273256358
	>>> metrics.r2()
	0.8980821917808219
	>>> metrics.explained_variance()
	0.9297534246575342
	"""
	def __init__(self, predicted_actual):
	self.predicted_actual = predicted_actual
	self.n_elem = len(predicted_actual)
	self.error = map(lambda (p, a): a - p, self.predicted_actual)
	self.measures = {'MSE': self.mse, 'RMSE': self.rmse, 'MAE': self.mae,
	'R2': self.r2, 'ExplVar': self.explained_variance}

	def mse(self):
	"""
	Calculates the Mean Squared Error (MSE), defined as the sum of the squares of the differences
	betweem the actual value and the predicted one for each of the N samples:

	:math:`MSE=\\dfrac{1}{N}\displaystyle\sum\limits_{i}^N (p_i - a_i)^2`

	:return: float, varying from 0 (best) and inf (worst).
	"""
	return np.mean(np.square(self.error))

	def rmse(self):
	"""
	Returns the square root of the MSE, which is useful to not depend on a scale while comparing
	performance among several models.

	:math:`RMSE=\\sqrt{MSE}`

	:return: float, varying from 0 (best) and inf (worst).
	"""
	return np.sqrt(self.mse())

	def mae(self):
	"""
	Calculates the Mean Absolute Error (MAE), defined as the sum over the absolute differences between
	the actual value and the predicted one for each of the N samples:

	:math:`MAE=\\dfrac{1}{N}\displaystyle\sum\limits_{i}^N \|p_i - a_i\|`

	:return: float, varying from 0 (best) and inf (worst).
	"""
	return np.mean(np.abs(self.error))

	def rmae(self):
	"""
	Returns the square root of the MAE, which is useful to not depend on a scale while comparing
	performance among several models.

	:math:`RMAE=\\sqrt{MAE}`

	:return: float, varying from 0 (best) and inf (worst).
	"""

	return np.sqrt(self.mae())

	def r2(self):
	"""
	Calculates the coefficient of determination or R^2, which indicates the proportion of the
	variance of the actual values that are explained by the predicted values.

	See more info in Wikipedia: `Coefficient of determination
	<https://en.wikipedia.org/wiki/Coefficient_of_determination>`_.

	:return: float, varying from 0 (worst) and 1 (best).
	"""
	mean_actual = np.mean(map(lambda (p, a): a, self.predicted_actual))
	ssres = np.sum(np.square(self.error))
	sstot = np.sum(np.square(map(lambda (p, a): a - mean_actual, self.predicted_actual)))
	return 1 - float(ssres / sstot)

	def explained_variance(self):
	"""
	Calculates the explained variance in the prediction over the actual values, such that:

	:math:`ExpVar=1-\\dfrac{Var(actual - predicted)}{Var(actual)}`

	:return: float, varying from 0 (worst) and 1 (best).
	"""
	var_error = np.var(self.error)
	var_actual = np.var(map(lambda (p, a): a, self.predicted_actual))
	return 1 - float(var_error / var_actual)

	def evaluate(self, measure='R2'):
	"""
	Applies some of the implemented metrics, that are registered in the dict self.measures.
	This function becomes useful as a wrapper to easily test every metric desired.

	:param measure: string, key of an implemented metric.
	:return: float
	"""
	assert measure in self.measures.keys(), ValueError("Measure %s doesn't belong to the supported ones: %s",
	str(measure), str(self.measures.keys()))
	return self.measures[measure]()