davidrichards/binary.rb

## binary.rb

module Confusion
  module MatrixBehavior
    # A matrix of the values (actual: rows, predicted: columns)
    attr_reader :values

    protected
      def assert_matrix_behavior(opts)
        assert_values(opts)
      end

      def assert_values(opts)
        values = opts[:values]
        @values = case values
        when Array
          GSL::Matrix.send(:[], *values)
        when GSL::Matrix
          values
        else
          nil
        end
        raise ArgumentError, "Must provide values" unless self.values
      end

  end

  class Binary

    # ============
    # = Behavior =
    # ============
    include MatrixBehavior

    # The labels of the matrix (both top down and lef to right)
    attr_reader :labels

    def initialize(opts={})
      assert_matrix_behavior(opts)
      @labels = opts[:labels]
      infer_labels_from_values
      raise ArgumentError, "Must provide labels" unless self.labels
    end

    # Tests that came back positive and were indeed positive.
    def true_positive
      @true_positive ||= values[0,0]
    end

    # Tests that came back positive but were actually negative.
    def false_positive
      @false_positive ||= values[0,1]
    end

    # False positives are commonly known as type I errors.
    alias :type_1_error :false_positive

    # Tests that came back netative, but were actually positive.
    def false_negative
      @false_negative ||= values[1,0]
    end

    # False negatives are commonly known as type II errors.
    alias :type_2_error :false_negative

    # Tests that came back negative and were indeed negative.
    def true_negative
      @true_negative ||= values[1,1]
    end

    # All events that are actually positive: true positives and false negatives.
    def condition_positive
      @condition_positive ||= true_positive + false_negative
    end

    # All events that are actually negative: true negatives and false negatives.
    def condition_negative
      @condition_negative ||= false_positive + true_negative
    end

    # All tests that came back positive: both true positives and false positives.
    def test_outcome_positive
      @test_outcome_positive ||= true_positive + false_positive
    end

    # All tests that came back negative: both true negatives and false negatives.
    def test_outcome_negative
      @test_outcome_negative ||= false_negative + true_negative
    end

    # The percentage of all positives that are detected.  So, sensitivity of 0.85 would
    # capture 85% of all positives.  This measure is independent of how well the underlying
    # model is at handling negatives.
    def sensitivity
      @sensitivity ||= true_positive / condition_positive
    end

    # Another name for sensitivity is power, or statistical power.  Again, this is a measure
    # of how well the underlying model is at recognizing positives.
    alias :power :sensitivity

    # Yet another name for sensitivity is recall: the % of positives captured by the
    # underlying model.
    alias :recall :sensitivity

    # Specificity is a measure of how well an underlying model/test/system is at
    # recognizing negatives.  It comes back as a percentage.  So, negatives captured
    # divided by the total actual negatives.
    def specificity
      @specificity ||= true_negative / condition_negative
    end

    # This is a measure of the positives that are correctly diagnosed.
    # Sensitivity speaks to predicted positives to actual positives, positive
    # predictive value speaks to hits to misses when predicting a positive value.
    # I.e. it's more of an optimism/pessimism measure vs. a grounded to reality measure.
    def positive_predictive_value
      @positive_predictive_value ||= true_positive / test_outcome_positive
    end

    # Another (clearer?) way of saying positive predictive value.
    alias :precision :positive_predictive_value

    # A measure of how accurate the underlying model is at predicting negatives.
    # The true negatives over all negative predictions.
    def negative_predictive_value
      @negative_predictive_value ||= true_negative / test_outcome_negative
    end

    # The rate of false positives to all positives, the inverse of specificity.
    def false_positive_rate
      @false_positive_rate ||= 1 - specificity
    end

    # A mathematical way of saying the false positive rate is just alpha.
    alias :alpha :false_positive_rate

    # The rate of false negatives to all negatives, or the inverse of sensitivity.
    def false_negative_rate
      @false_negative_rate ||= 1 - sensitivity
    end

    # A mathematical way of saying the false negatives rate is beta.
    alias :beta :false_negative_rate

    # The likelihood ratio of positive predictions to the false positive rate.
    def likelihood_ratio_positive
      @likelihood_ratio_positive ||= sensitivity / alpha
    end

    # The likelihood ratio of negative predictions to the false negative rate.
    def likelihood_ratio_negative
      @likelihood_ratio_negative ||= beta / specificity
    end

    # A single measure of the specificity and sensitivity tests, a harmonic mean of the two.
    def f_measure
      @f_measure ||= 2 * ((precision * recall) / (precision + recall))
    end

    # A total of all tests, whatever the accuracy.
    def sample_size
      @sample_size ||= condition_positive + condition_negative
    end

    def odds_ratio
      @odds_ratio ||= ((true_positive / false_positive) / (false_negative / true_negative))
    end

    def risk_ratio
      @risk_ratio ||= ((true_positive / (true_positive + false_positive)) / (false_negative / (false_negative + true_negative)))
    end

    protected
      def infer_labels_from_values
        return true unless self.values.size1 == 2
        @labels ||= [true, false]
      end

  end

end

## binary_spec.rb
require File.expand_path('../../spec_helper', __FILE__)

describe Confusion::Binary do

  context "when including external behavior" do
    before do
      @options = {
        :labels => [:positive, :negative],
        :values => [[20,180],[10,1820]]
      }
      @model = Confusion::Binary.new(@options)
    end
    it_should_behave_like "a confusion matrix"
  end

  context "when interpretting the matrix" do
    before do
      @options = {
        :labels => [:positive, :negative],
        :values => [[20,180],[10,1820]]
      }
      @bc = Confusion::Binary.new(@options)
    end

    it "should provide a labels reader" do
      @bc.should respond_to(:labels)
    end

    it "should take an optional parameter for labels" do
      @bc.labels.should eql(@options[:labels])
    end

    it "should infer [true, false] for the labels if the data is binomial" do
      @bc = Confusion::Binary.new(:values => [[1,2],[1,2]])
      @bc.labels.should eql([true, false])
    end

    it "should have a true_positive" do
      @bc.true_positive.should eql(20.0)
    end

    it "should have a false_positive" do
      @bc.false_positive.should eql(180.0)
    end

    it "should have a type_1_error" do
      @bc.type_1_error.should eql(180.0)
    end

    it "should have a false_negative" do
      @bc.false_negative.should eql(10.0)
    end

    it "should have a type_2_error" do
      @bc.type_2_error.should eql(10.0)
    end

    it "should have a true_negative" do
      @bc.true_negative.should eql(1820.0)
    end

    it "should have a condition_positive" do
      @bc.condition_positive.should eql(30.0)
    end

    it "should have a condition_negative" do
      @bc.condition_negative.should eql(2000.0)
    end

    it "should have a test_outcome_positive" do
      @bc.test_outcome_positive.should eql(200.0)
    end

    it "should have a test_outcome_negative" do
      @bc.test_outcome_negative.should eql(1830.0)
    end

    it "should have a sensitivity" do
      @bc.sensitivity.should be_within(1.0e-5).of(0.66667)
    end

    it "should have power as an alias of sensitivity" do
      @bc.power.should be_within(1.0e-5).of(0.66667)
    end

    it "should have a specificity" do
      @bc.specificity.should be_within(1.0e-5).of(0.91)
    end

    it "should have a positive_predictive_value" do
      @bc.positive_predictive_value.should be_within(1.0e-5).of(0.1)
    end

    it "should have a negative_predictive_value" do
      @bc.negative_predictive_value.should be_within(1.0e-5).of(0.99453)
    end

    it "should have a false_positive_rate" do
      @bc.false_positive_rate.should be_within(1.0e-5).of(0.09)
    end

    it "should have alpha as an alias for the false_positive_rate" do
      @bc.alpha.should be_within(1.0e-5).of(0.09)
    end

    it "should have a false_negative_rate" do
      @bc.false_negative_rate.should be_within(1.0e-5).of(0.33333)
    end

    it "should have beta as an alias for teh false_negative_rate" do
      @bc.beta.should be_within(1.0e-5).of(0.33333)
    end

    it "should have a likelihood_ratio_positive" do
      @bc.likelihood_ratio_positive.should be_within(1.0e-5).of(7.4074)
    end

    it "should have a likelihood_ratio_negative" do
      @bc.likelihood_ratio_negative.should be_within(1.0e-5).of(0.3663)
    end

    it "should have precision as an alias of positive_predictive_value" do
      @bc.precision.should be_within(1.0e-5).of(0.1)
    end

    it "should have recall as an alias to sensitivity" do
      @bc.recall.should be_within(1.0e-5).of(0.66667)
    end

    it "should have the f_measure as a harmonic mean between precision and recall" do
      @bc.f_measure.should be_within(1.0e-5).of(0.17391)
    end

    it "should provide the sample_size" do
      @bc.sample_size.should be_within(1.0e-5).of(2030.0)
    end

    it "should provide an odds_ratio" do
      @bc.odds_ratio.should be_within(1.0e-5).of(20.22222)
    end

    it "should provide a risk_ratio" do
      @bc.risk_ratio.should be_within(1.0e-5).of(18.3)
    end

  end

end

	module Confusion
	module MatrixBehavior
	# A matrix of the values (actual: rows, predicted: columns)
	attr_reader :values

	protected
	def assert_matrix_behavior(opts)
	assert_values(opts)
	end

	def assert_values(opts)
	values = opts[:values]
	@values = case values
	when Array
	GSL::Matrix.send(:[], *values)
	when GSL::Matrix
	values
	else
	nil
	end
	raise ArgumentError, "Must provide values" unless self.values
	end

	end

	class Binary

	# ============
	# = Behavior =
	# ============
	include MatrixBehavior

	# The labels of the matrix (both top down and lef to right)
	attr_reader :labels

	def initialize(opts={})
	assert_matrix_behavior(opts)
	@labels = opts[:labels]
	infer_labels_from_values
	raise ArgumentError, "Must provide labels" unless self.labels
	end

	# Tests that came back positive and were indeed positive.
	def true_positive
	@true_positive \|\|= values[0,0]
	end

	# Tests that came back positive but were actually negative.
	def false_positive
	@false_positive \|\|= values[0,1]
	end

	# False positives are commonly known as type I errors.
	alias :type_1_error :false_positive

	# Tests that came back netative, but were actually positive.
	def false_negative
	@false_negative \|\|= values[1,0]
	end

	# False negatives are commonly known as type II errors.
	alias :type_2_error :false_negative

	# Tests that came back negative and were indeed negative.
	def true_negative
	@true_negative \|\|= values[1,1]
	end

	# All events that are actually positive: true positives and false negatives.
	def condition_positive
	@condition_positive \|\|= true_positive + false_negative
	end

	# All events that are actually negative: true negatives and false negatives.
	def condition_negative
	@condition_negative \|\|= false_positive + true_negative
	end

	# All tests that came back positive: both true positives and false positives.
	def test_outcome_positive
	@test_outcome_positive \|\|= true_positive + false_positive
	end

	# All tests that came back negative: both true negatives and false negatives.
	def test_outcome_negative
	@test_outcome_negative \|\|= false_negative + true_negative
	end

	# The percentage of all positives that are detected. So, sensitivity of 0.85 would
	# capture 85% of all positives. This measure is independent of how well the underlying
	# model is at handling negatives.
	def sensitivity
	@sensitivity \|\|= true_positive / condition_positive
	end

	# Another name for sensitivity is power, or statistical power. Again, this is a measure
	# of how well the underlying model is at recognizing positives.
	alias :power :sensitivity

	# Yet another name for sensitivity is recall: the % of positives captured by the
	# underlying model.
	alias :recall :sensitivity

	# Specificity is a measure of how well an underlying model/test/system is at
	# recognizing negatives. It comes back as a percentage. So, negatives captured
	# divided by the total actual negatives.
	def specificity
	@specificity \|\|= true_negative / condition_negative
	end

	# This is a measure of the positives that are correctly diagnosed.
	# Sensitivity speaks to predicted positives to actual positives, positive
	# predictive value speaks to hits to misses when predicting a positive value.
	# I.e. it's more of an optimism/pessimism measure vs. a grounded to reality measure.
	def positive_predictive_value
	@positive_predictive_value \|\|= true_positive / test_outcome_positive
	end

	# Another (clearer?) way of saying positive predictive value.
	alias :precision :positive_predictive_value

	# A measure of how accurate the underlying model is at predicting negatives.
	# The true negatives over all negative predictions.
	def negative_predictive_value
	@negative_predictive_value \|\|= true_negative / test_outcome_negative
	end

	# The rate of false positives to all positives, the inverse of specificity.
	def false_positive_rate
	@false_positive_rate \|\|= 1 - specificity
	end

	# A mathematical way of saying the false positive rate is just alpha.
	alias :alpha :false_positive_rate

	# The rate of false negatives to all negatives, or the inverse of sensitivity.
	def false_negative_rate
	@false_negative_rate \|\|= 1 - sensitivity
	end

	# A mathematical way of saying the false negatives rate is beta.
	alias :beta :false_negative_rate

	# The likelihood ratio of positive predictions to the false positive rate.
	def likelihood_ratio_positive
	@likelihood_ratio_positive \|\|= sensitivity / alpha
	end

	# The likelihood ratio of negative predictions to the false negative rate.
	def likelihood_ratio_negative
	@likelihood_ratio_negative \|\|= beta / specificity
	end

	# A single measure of the specificity and sensitivity tests, a harmonic mean of the two.
	def f_measure
	@f_measure \|\|= 2 * ((precision * recall) / (precision + recall))
	end

	# A total of all tests, whatever the accuracy.
	def sample_size
	@sample_size \|\|= condition_positive + condition_negative
	end

	def odds_ratio
	@odds_ratio \|\|= ((true_positive / false_positive) / (false_negative / true_negative))
	end

	def risk_ratio
	@risk_ratio \|\|= ((true_positive / (true_positive + false_positive)) / (false_negative / (false_negative + true_negative)))
	end

	protected
	def infer_labels_from_values
	return true unless self.values.size1 == 2
	@labels \|\|= [true, false]
	end

	end

	end
	require File.expand_path('../../spec_helper', __FILE__)

	describe Confusion::Binary do

	context "when including external behavior" do
	before do
	@options = {
	:labels => [:positive, :negative],
	:values => [[20,180],[10,1820]]
	}
	@model = Confusion::Binary.new(@options)
	end
	it_should_behave_like "a confusion matrix"
	end

	context "when interpretting the matrix" do
	before do
	@options = {
	:labels => [:positive, :negative],
	:values => [[20,180],[10,1820]]
	}
	@bc = Confusion::Binary.new(@options)
	end

	it "should provide a labels reader" do
	@bc.should respond_to(:labels)
	end

	it "should take an optional parameter for labels" do
	@bc.labels.should eql(@options[:labels])
	end

	it "should infer [true, false] for the labels if the data is binomial" do
	@bc = Confusion::Binary.new(:values => [[1,2],[1,2]])
	@bc.labels.should eql([true, false])
	end

	it "should have a true_positive" do
	@bc.true_positive.should eql(20.0)
	end

	it "should have a false_positive" do
	@bc.false_positive.should eql(180.0)
	end

	it "should have a type_1_error" do
	@bc.type_1_error.should eql(180.0)
	end

	it "should have a false_negative" do
	@bc.false_negative.should eql(10.0)
	end

	it "should have a type_2_error" do
	@bc.type_2_error.should eql(10.0)
	end

	it "should have a true_negative" do
	@bc.true_negative.should eql(1820.0)
	end

	it "should have a condition_positive" do
	@bc.condition_positive.should eql(30.0)
	end

	it "should have a condition_negative" do
	@bc.condition_negative.should eql(2000.0)
	end

	it "should have a test_outcome_positive" do
	@bc.test_outcome_positive.should eql(200.0)
	end

	it "should have a test_outcome_negative" do
	@bc.test_outcome_negative.should eql(1830.0)
	end

	it "should have a sensitivity" do
	@bc.sensitivity.should be_within(1.0e-5).of(0.66667)
	end

	it "should have power as an alias of sensitivity" do
	@bc.power.should be_within(1.0e-5).of(0.66667)
	end

	it "should have a specificity" do
	@bc.specificity.should be_within(1.0e-5).of(0.91)
	end

	it "should have a positive_predictive_value" do
	@bc.positive_predictive_value.should be_within(1.0e-5).of(0.1)
	end

	it "should have a negative_predictive_value" do
	@bc.negative_predictive_value.should be_within(1.0e-5).of(0.99453)
	end

	it "should have a false_positive_rate" do
	@bc.false_positive_rate.should be_within(1.0e-5).of(0.09)
	end

	it "should have alpha as an alias for the false_positive_rate" do
	@bc.alpha.should be_within(1.0e-5).of(0.09)
	end

	it "should have a false_negative_rate" do
	@bc.false_negative_rate.should be_within(1.0e-5).of(0.33333)
	end

	it "should have beta as an alias for teh false_negative_rate" do
	@bc.beta.should be_within(1.0e-5).of(0.33333)
	end

	it "should have a likelihood_ratio_positive" do
	@bc.likelihood_ratio_positive.should be_within(1.0e-5).of(7.4074)
	end

	it "should have a likelihood_ratio_negative" do
	@bc.likelihood_ratio_negative.should be_within(1.0e-5).of(0.3663)
	end

	it "should have precision as an alias of positive_predictive_value" do
	@bc.precision.should be_within(1.0e-5).of(0.1)
	end

	it "should have recall as an alias to sensitivity" do
	@bc.recall.should be_within(1.0e-5).of(0.66667)
	end

	it "should have the f_measure as a harmonic mean between precision and recall" do
	@bc.f_measure.should be_within(1.0e-5).of(0.17391)
	end

	it "should provide the sample_size" do
	@bc.sample_size.should be_within(1.0e-5).of(2030.0)
	end

	it "should provide an odds_ratio" do
	@bc.odds_ratio.should be_within(1.0e-5).of(20.22222)
	end

	it "should provide a risk_ratio" do
	@bc.risk_ratio.should be_within(1.0e-5).of(18.3)
	end

	end

	end