Skip to content

Instantly share code, notes, and snippets.

@davidrichards
Created October 16, 2009 01:55
Show Gist options
  • Save davidrichards/211471 to your computer and use it in GitHub Desktop.
Save davidrichards/211471 to your computer and use it in GitHub Desktop.
# This is used when the data set's true max and min can't be calculated.
# It provides approximate values for normalization.
class PseudoNormalize
require 'mathn'
class << self
def process(opts={})
sample = opts.delete(:sample)
opts = {:sample => sample} if opts.empty
pn = new(opts)
pn.process(*sample)
end
end
attr_reader :max, :min, :diff, :learn
attr_accessor :learning_flag
def initialize(opts={})
@max = opts[:max] if opts[:max]
@min = opts[:min] if opts[:min]
@learn = opts.fetch(:learn, false)
set_max_min(opts[:sample]) if opts[:sample]
raise ArgumentError, "Must provide a sample or a max and a min" unless self.max and self.min
end
def process(*values)
ret_value = if values.size == 1
normalize values.first
else
values.map {|v| normalize(v)}
end
# Recalculate everything if the learning flag was set
if self.learning_flag
self.learning_flag = false
process(*values)
else
ret_value
end
end
# I don't know, 3 standard deviations ought to do it...
def set_max_min(*samples)
mean = self.mean(*samples)
std = self.standard_deviation(*samples)
@max = mean + (std * 3)
@min = mean - (std * 3)
end
def normalize(v)
# Get the true max, min, and diff for this value
max = v > self.max ? v : self.max
min = v < self.min ? v : self.min
diff = max - min
# Set a flag that the range changed if we're learning and the range changed
self.learning_flag = true if (max != self.max or min != self.min) and self.learn
# Change the range (max, min, and diff) if we are learning
@max, @min, @diff = max, min, diff if self.learn
# Return a normalized value
(v - min) / diff
end
protected
def diff(reset=false)
@diff = nil if reset
@diff ||= self.max - self.min
end
# Probably shouldn't use this
def sigmoid(v)
1 / (1 + Math::E ** -v)
end
def mean(*samples)
sum(*samples) / samples.size
end
def zero(*samples)
samples.any? {|e| e.is_a?(Float)} ? 0.0 : 0
end
def sum(*samples)
samples.inject(zero(*samples)) {|s, e| s += e}
end
def variance(*samples)
m = mean(*samples)
sum_of_differences = samples.inject(zero(*samples)) {|s, i| s += (m - i) ** 2 }
sum_of_differences / (samples.size - 1)
end
def standard_deviation(*samples)
Math::sqrt(variance(*samples))
end
end
describe PseudoNormalize do
before do
@pn = PseudoNormalize.new(:max => 10, :min => 0)
end
it "should be able to normalize values with a known max and min" do
@pn.normalize(3).should eql(3/10)
@pn.normalize(6).should eql(6/10)
@pn.normalize(9).should eql(9/10)
end
it "should be able to normalize a value higher than the max" do
@pn.normalize(10).should eql(10/10)
@pn.normalize(11).should eql(11/11)
@pn.normalize(15).should eql(15/15)
end
it "should be able to normalize a value lower than the min" do
@pn.normalize(-1).should eql(0/11)
@pn.normalize(-4).should eql(0/15)
end
it "should be able to learn the max and the min" do
@pn = PseudoNormalize.new(:max => 10, :min => 0, :learn => true)
@pn.learn.should be_true
@pn.normalize(10).should eql(10/10)
@pn.normalize(11).should eql(11/11)
@pn.normalize(10).should eql(10/11)
end
it "should be able to process a value" do
@pn.process(5).should eql(5/10)
end
it "should be able to process more than one value" do
@pn.process(2,4,6).should eql([2/10, 4/10, 6/10])
end
it "should be able to accurately process more than one value when the range changes" do
@pn = PseudoNormalize.new(:max => 10, :min => 0, :learn => true)
@pn.process(2,4,6,11).should eql([2/11, 4/11, 6/11, 11/11])
end
it "should be able to set the range based on 3 standard deviations from a mean" do
@pn.set_max_min(*(1..10_000).map{rand})
@pn.max.should be_close(1.37, 0.1)
@pn.min.should be_close(-0.38, 0.1)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment