Skip to content

Instantly share code, notes, and snippets.

@bogdan
Created February 5, 2016 13:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bogdan/10c03a4d1b64e90804fc to your computer and use it in GitHub Desktop.
Save bogdan/10c03a4d1b64e90804fc to your computer and use it in GitHub Desktop.
module AbMath
module Distribution
SQ2PI = Math.sqrt(2 * Math::PI)
class << self
def mean(distribution)
# average value
return 0.0 if distribution.empty?
distribution.reduce(:+).to_f / distribution.size
end
# https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation
def standard_deviation(distribution)
return 0.0 if distribution.empty?
d_size = distribution.size
mean = distribution.reduce(:+).to_f / d_size
sum_diff2 = distribution.inject(0) { |sum, value| sum + (value.to_f - mean)**2 }
variance = sum_diff2 / d_size
Math.sqrt variance
end
# https://en.wikipedia.org/wiki/Standard_error
def standard_error(distribution)
return 0.0 if distribution.empty?
standard_deviation(distribution) / Math.sqrt(distribution.size)
end
# https://en.wikipedia.org/wiki/Standard_normal_table
def quantile(qn)
b = [1.570796288, 0.03706987906, -0.8364353589e-3,
-0.2250947176e-3, 0.6841218299e-5, 0.5824238515e-5,
-0.104527497e-5, 0.8360937017e-7, -0.3231081277e-8,
0.3657763036e-10, 0.6936233982e-12]
raise ArgumentError, "Argument must be in range [0.0, 1.0]" if qn < 0.0 || qn > 1.0
return 0.0 if qn == 0.5
w1 = qn
qn > 0.5 && w1 = 1.0 - w1
w3 = -Math.log(4.0 * w1 * (1.0 - w1))
w1 = b[0]
1.upto(b.size-1) do |i|
w1 += b[i] * w3**i
end
qn > 0.5 ? Math.sqrt(w1 * w3) : -Math.sqrt(w1 * w3)
end
# Normal cumulative distribution function
# Returns the integral of normal distribution over (-Infty, z].
def cdf(z_score)
return 0.0 if z_score < -12
return 1.0 if z_score > 12
return 0.5 if z_score == 0.0
z = z_score.to_f.abs
z2 = z**2
t = q = z * Math.exp(-0.5 * z2) / SQ2PI
3.step(199, 2) do |i|
prev = q
t *= z2 / i
q += t
return (z_score > 0.0 ? 0.5 + q : 0.5 - q) if q <= prev
end
z_score > 0.0 ? 1.0 : 0.0
end
end
end
def standard_deviation
positive_size = (size*rate).to_i
negative_size = size - positive_size
# First variant
# distribution = [1]*positive_size + [0]*negative_size
# AbMath::Distribution.standard_deviation distribution
# Second faster variant
sum_of_diff = positive_size*((1-rate)**2) + negative_size*((0-rate)**2)
Math.sqrt (sum_of_diff / size)
end
def standard_error
# First Variant
# standard_deviation / Math.sqrt(size)
# Second faster Variant
if rate > 1
ApplicationErrors.notify_developers "Invalid rate for hypothesis detected"
return 0.0
end
Math.sqrt(rate * (1 - rate) / size.to_f)
end
def p_value(z_score = nil)
cdf = AbMath::Distribution.cdf(z_score || standard_deviation)
return cdf < 0.5 ? 1.0 - cdf : cdf
end
def confidence(competitor)
return 0.5 if rate == competitor.rate
normalized_standard_error = Math.sqrt(standard_error**2 + competitor.standard_error**2)
return 0.5 if normalized_standard_error.nan?
return 0.5 if rate.zero? and competitor.rate.zero?
z_score = (rate - competitor.rate) / normalized_standard_error
p_value(z_score)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment