-
-
Save katanacrimson/09c43aef2e381e3d61194a6878bd2063 to your computer and use it in GitHub Desktop.
MySQL stored functions for calculating confidence intervals using Wilson scores: more precisely "Lower bound of Wilson score confidence interval for a Bernoulli parameter". Includes a P(qn) function (inverse normal distribution). See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html This describes a way to rank items based on a ve…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Francis Avila 2009-07-04 | |
# See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html | |
# These functions calculate "Lower bound of Wilson score confidence interval for a Bernoulli parameter" | |
# Two stored functions: | |
# CI_LOWER_BOUND(pos, trials, confidence) calculate a wilson score given POS positive votes in TRIALS number of trials | |
# PNORM(qn) calculate P(qn) (inverse normal distribution); needed by CI_LOWER_BOUND | |
delimiter ;; | |
CREATE DEFINER=`root`@`localhost` FUNCTION `ci_lower_bound`(pos INTEGER, trials INTEGER, confidence DOUBLE) RETURNS double | |
NO SQL | |
DETERMINISTIC | |
SQL SECURITY INVOKER | |
COMMENT 'Return the confidence interval lower bound (wilson score) for POS positive votes in TRIALS trials, with a desired confidence of CONFIDENCE' | |
BEGIN | |
DECLARE z, phat DOUBLE; | |
IF pos=0 THEN | |
RETURN 0.0; | |
ELSEIF pos > trials THEN | |
RETURN NULL; | |
END IF; | |
SET z = PNORM(1-confidence/2); | |
SET phat = 1.0*pos/trials; | |
RETURN (phat+z*z/(2*trials) - z * SQRT((phat*(1-phat)+z*z/(4*trials))/trials))/(1+z*z/trials); | |
END; | |
;; | |
delimiter ; | |
delimiter ;; | |
CREATE DEFINER=`root`@`localhost` FUNCTION `pnorm`(qn DOUBLE) RETURNS double | |
NO SQL | |
DETERMINISTIC | |
SQL SECURITY INVOKER | |
COMMENT 'Inverse gaussian distribution (P normal distribution)' | |
BEGIN | |
DECLARE w3 DOUBLE; | |
DECLARE w1 DOUBLE DEFAULT 1.570796288; | |
DECLARE ai DOUBLE DEFAULT 0.03706987906; | |
DECLARE i TINYINT UNSIGNED DEFAULT 2; | |
IF qn<=0.0 or qn>=1.0 THEN | |
RETURN NULL; | |
ELSEIF qn=0.5 THEN | |
RETURN 0.0; | |
END IF; | |
SET w3 = -LN(4.0*qn*(1.0-qn)); | |
SET w1 = w1 + ai*w3; | |
REPEAT | |
SET ai = CASE i | |
WHEN 2 THEN -0.8364353589e-3 | |
WHEN 3 THEN -0.2250947176e-3 | |
WHEN 4 THEN 0.6841218299e-5 | |
WHEN 5 THEN 0.5824238515e-5 | |
WHEN 6 THEN -0.104527497e-5 | |
WHEN 7 THEN 0.8360937017e-7 | |
WHEN 8 THEN -0.3231081277e-8 | |
WHEN 9 THEN 0.3657763036e-10 | |
WHEN 10 THEN 0.6936233982e-12 | |
END; | |
SET w1 = w1 + (ai * POWER(w3, i)), i = i+1; | |
UNTIL i=11 END REPEAT; | |
RETURN IF(qn<0.5,-1.0,1.0)*SQRT(w1*w3); | |
END; | |
;; | |
delimiter ; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment