Skip to content

Instantly share code, notes, and snippets.

@katanacrimson
Forked from favila/ci_lower_bound.sql
Created July 6, 2017 15:55
Show Gist options
  • Save katanacrimson/09c43aef2e381e3d61194a6878bd2063 to your computer and use it in GitHub Desktop.
Save katanacrimson/09c43aef2e381e3d61194a6878bd2063 to your computer and use it in GitHub Desktop.
MySQL stored functions for calculating confidence intervals using Wilson scores: more precisely "Lower bound of Wilson score confidence interval for a Bernoulli parameter". Includes a P(qn) function (inverse normal distribution). See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html This describes a way to rank items based on a ve…
# Francis Avila 2009-07-04
# See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
# These functions calculate "Lower bound of Wilson score confidence interval for a Bernoulli parameter"
# Two stored functions:
# CI_LOWER_BOUND(pos, trials, confidence) calculate a wilson score given POS positive votes in TRIALS number of trials
# PNORM(qn) calculate P(qn) (inverse normal distribution); needed by CI_LOWER_BOUND
delimiter ;;
CREATE DEFINER=`root`@`localhost` FUNCTION `ci_lower_bound`(pos INTEGER, trials INTEGER, confidence DOUBLE) RETURNS double
NO SQL
DETERMINISTIC
SQL SECURITY INVOKER
COMMENT 'Return the confidence interval lower bound (wilson score) for POS positive votes in TRIALS trials, with a desired confidence of CONFIDENCE'
BEGIN
DECLARE z, phat DOUBLE;
IF pos=0 THEN
RETURN 0.0;
ELSEIF pos > trials THEN
RETURN NULL;
END IF;
SET z = PNORM(1-confidence/2);
SET phat = 1.0*pos/trials;
RETURN (phat+z*z/(2*trials) - z * SQRT((phat*(1-phat)+z*z/(4*trials))/trials))/(1+z*z/trials);
END;
;;
delimiter ;
delimiter ;;
CREATE DEFINER=`root`@`localhost` FUNCTION `pnorm`(qn DOUBLE) RETURNS double
NO SQL
DETERMINISTIC
SQL SECURITY INVOKER
COMMENT 'Inverse gaussian distribution (P normal distribution)'
BEGIN
DECLARE w3 DOUBLE;
DECLARE w1 DOUBLE DEFAULT 1.570796288;
DECLARE ai DOUBLE DEFAULT 0.03706987906;
DECLARE i TINYINT UNSIGNED DEFAULT 2;
IF qn<=0.0 or qn>=1.0 THEN
RETURN NULL;
ELSEIF qn=0.5 THEN
RETURN 0.0;
END IF;
SET w3 = -LN(4.0*qn*(1.0-qn));
SET w1 = w1 + ai*w3;
REPEAT
SET ai = CASE i
WHEN 2 THEN -0.8364353589e-3
WHEN 3 THEN -0.2250947176e-3
WHEN 4 THEN 0.6841218299e-5
WHEN 5 THEN 0.5824238515e-5
WHEN 6 THEN -0.104527497e-5
WHEN 7 THEN 0.8360937017e-7
WHEN 8 THEN -0.3231081277e-8
WHEN 9 THEN 0.3657763036e-10
WHEN 10 THEN 0.6936233982e-12
END;
SET w1 = w1 + (ai * POWER(w3, i)), i = i+1;
UNTIL i=11 END REPEAT;
RETURN IF(qn<0.5,-1.0,1.0)*SQRT(w1*w3);
END;
;;
delimiter ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment