davidrichards/pseudo_normalization.rb

## pseudo_normalization.rb
# This is used when the data set's true max and min can't be calculated.
# It provides approximate values for normalization.
class PseudoNormalize
  require 'mathn'
  class << self
    def process(opts={})
      sample = opts.delete(:sample)
      opts = {:sample => sample} if opts.empty
      pn = new(opts)
      pn.process(*sample)
    end
  end

  attr_reader :max, :min, :diff, :learn
  attr_accessor :learning_flag

  def initialize(opts={})
    @max = opts[:max] if opts[:max]
    @min = opts[:min] if opts[:min]
    @learn = opts.fetch(:learn, false)
    set_max_min(opts[:sample]) if opts[:sample]
    raise ArgumentError, "Must provide a sample or a max and a min" unless self.max and self.min
  end

  def process(*values)
    ret_value = if values.size == 1
      normalize values.first
    else
      values.map {|v| normalize(v)}
    end

    # Recalculate everything if the learning flag was set
    if self.learning_flag
      self.learning_flag = false
      process(*values)
    else
      ret_value
    end
  end

  # I don't know, 3 standard deviations ought to do it...
  def set_max_min(*samples)
    mean = self.mean(*samples)
    std = self.standard_deviation(*samples)
    @max = mean + (std * 3)
    @min = mean - (std * 3)
  end

  def normalize(v)
    # Get the true max, min, and diff for this value
    max = v > self.max ? v : self.max
    min = v < self.min ? v : self.min
    diff = max - min

    # Set a flag that the range changed if we're learning and the range changed
    self.learning_flag = true if (max != self.max or min != self.min) and self.learn

    # Change the range (max, min, and diff) if we are learning
    @max, @min, @diff = max, min, diff if self.learn

    # Return a normalized value
    (v - min) / diff
  end

  protected
    def diff(reset=false)
      @diff = nil if reset
      @diff ||= self.max - self.min
    end

    # Probably shouldn't use this
    def sigmoid(v)
      1 / (1 + Math::E ** -v)
    end

    def mean(*samples)
      sum(*samples) / samples.size
    end

    def zero(*samples)
      samples.any? {|e| e.is_a?(Float)} ? 0.0 : 0
    end

    def sum(*samples)
      samples.inject(zero(*samples)) {|s, e| s += e}
    end

    def variance(*samples)
      m = mean(*samples)
      sum_of_differences = samples.inject(zero(*samples)) {|s, i| s += (m - i) ** 2 }
      sum_of_differences / (samples.size - 1)
    end

    def standard_deviation(*samples)
      Math::sqrt(variance(*samples))
    end

end

describe PseudoNormalize do

  before do
    @pn = PseudoNormalize.new(:max => 10, :min => 0)
  end

  it "should be able to normalize values with a known max and min" do
    @pn.normalize(3).should eql(3/10)
    @pn.normalize(6).should eql(6/10)
    @pn.normalize(9).should eql(9/10)
  end

  it "should be able to normalize a value higher than the max" do
    @pn.normalize(10).should eql(10/10)
    @pn.normalize(11).should eql(11/11)
    @pn.normalize(15).should eql(15/15)
  end

  it "should be able to normalize a value lower than the min" do
    @pn.normalize(-1).should eql(0/11)
    @pn.normalize(-4).should eql(0/15)
  end

  it "should be able to learn the max and the min" do
    @pn = PseudoNormalize.new(:max => 10, :min => 0, :learn => true)
    @pn.learn.should be_true
    @pn.normalize(10).should eql(10/10)
    @pn.normalize(11).should eql(11/11)
    @pn.normalize(10).should eql(10/11)
  end

  it "should be able to process a value" do
    @pn.process(5).should eql(5/10)
  end

  it "should be able to process more than one value" do
    @pn.process(2,4,6).should eql([2/10, 4/10, 6/10])
  end

  it "should be able to accurately process more than one value when the range changes" do
    @pn = PseudoNormalize.new(:max => 10, :min => 0, :learn => true)
    @pn.process(2,4,6,11).should eql([2/11, 4/11, 6/11, 11/11])
  end

  it "should be able to set the range based on 3 standard deviations from a mean" do
    @pn.set_max_min(*(1..10_000).map{rand})
    @pn.max.should be_close(1.37, 0.1)
    @pn.min.should be_close(-0.38, 0.1)
  end
end
	# This is used when the data set's true max and min can't be calculated.
	# It provides approximate values for normalization.
	class PseudoNormalize
	require 'mathn'
	class << self
	def process(opts={})
	sample = opts.delete(:sample)
	opts = {:sample => sample} if opts.empty
	pn = new(opts)
	pn.process(*sample)
	end
	end

	attr_reader :max, :min, :diff, :learn
	attr_accessor :learning_flag

	def initialize(opts={})
	@max = opts[:max] if opts[:max]
	@min = opts[:min] if opts[:min]
	@learn = opts.fetch(:learn, false)
	set_max_min(opts[:sample]) if opts[:sample]
	raise ArgumentError, "Must provide a sample or a max and a min" unless self.max and self.min
	end

	def process(*values)
	ret_value = if values.size == 1
	normalize values.first
	else
	values.map {\|v\| normalize(v)}
	end

	# Recalculate everything if the learning flag was set
	if self.learning_flag
	self.learning_flag = false
	process(*values)
	else
	ret_value
	end
	end

	# I don't know, 3 standard deviations ought to do it...
	def set_max_min(*samples)
	mean = self.mean(*samples)
	std = self.standard_deviation(*samples)
	@max = mean + (std * 3)
	@min = mean - (std * 3)
	end

	def normalize(v)
	# Get the true max, min, and diff for this value
	max = v > self.max ? v : self.max
	min = v < self.min ? v : self.min
	diff = max - min

	# Set a flag that the range changed if we're learning and the range changed
	self.learning_flag = true if (max != self.max or min != self.min) and self.learn

	# Change the range (max, min, and diff) if we are learning
	@max, @min, @diff = max, min, diff if self.learn

	# Return a normalized value
	(v - min) / diff
	end

	protected
	def diff(reset=false)
	@diff = nil if reset
	@diff \|\|= self.max - self.min
	end

	# Probably shouldn't use this
	def sigmoid(v)
	1 / (1 + Math::E ** -v)
	end

	def mean(*samples)
	sum(*samples) / samples.size
	end

	def zero(*samples)
	samples.any? {\|e\| e.is_a?(Float)} ? 0.0 : 0
	end

	def sum(*samples)
	samples.inject(zero(*samples)) {\|s, e\| s += e}
	end

	def variance(*samples)
	m = mean(*samples)
	sum_of_differences = samples.inject(zero(samples)) {\|s, i\| s += (m - i) * 2 }
	sum_of_differences / (samples.size - 1)
	end

	def standard_deviation(*samples)
	Math::sqrt(variance(*samples))
	end

	end

	describe PseudoNormalize do

	before do
	@pn = PseudoNormalize.new(:max => 10, :min => 0)
	end

	it "should be able to normalize values with a known max and min" do
	@pn.normalize(3).should eql(3/10)
	@pn.normalize(6).should eql(6/10)
	@pn.normalize(9).should eql(9/10)
	end

	it "should be able to normalize a value higher than the max" do
	@pn.normalize(10).should eql(10/10)
	@pn.normalize(11).should eql(11/11)
	@pn.normalize(15).should eql(15/15)
	end

	it "should be able to normalize a value lower than the min" do
	@pn.normalize(-1).should eql(0/11)
	@pn.normalize(-4).should eql(0/15)
	end

	it "should be able to learn the max and the min" do
	@pn = PseudoNormalize.new(:max => 10, :min => 0, :learn => true)
	@pn.learn.should be_true
	@pn.normalize(10).should eql(10/10)
	@pn.normalize(11).should eql(11/11)
	@pn.normalize(10).should eql(10/11)
	end

	it "should be able to process a value" do
	@pn.process(5).should eql(5/10)
	end

	it "should be able to process more than one value" do
	@pn.process(2,4,6).should eql([2/10, 4/10, 6/10])
	end

	it "should be able to accurately process more than one value when the range changes" do
	@pn = PseudoNormalize.new(:max => 10, :min => 0, :learn => true)
	@pn.process(2,4,6,11).should eql([2/11, 4/11, 6/11, 11/11])
	end

	it "should be able to set the range based on 3 standard deviations from a mean" do
	@pn.set_max_min(*(1..10_000).map{rand})
	@pn.max.should be_close(1.37, 0.1)
	@pn.min.should be_close(-0.38, 0.1)
	end
	end