Skip to content

Instantly share code, notes, and snippets.

Created April 9, 2013 19:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save anonymous/5348760 to your computer and use it in GitHub Desktop.
Save anonymous/5348760 to your computer and use it in GitHub Desktop.
MySQL stored functions for calculating confidence intervals using Wilson scores; and a P(qn) function for inverse normal distributions. See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html This describes a way to rank items based on a very small number of votes. Two stored functions: * CI_LOWER_BOUND(pos, trials, confidence) calc…
# Francis Avila 2009-07-04
# See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
# Two stored functions:
# CI_LOWER_BOUND(pos, trials, confidence) calculate a wilson score given POS positive votes in TRIALS number of trials
# PNORM(qn) calculate P(qn) (inverse normal distribution); needed by CI_LOWER_BOUND
delimiter ;;
CREATE DEFINER=`root`@`localhost` FUNCTION `ci_lower_bound`(pos INTEGER, trials INTEGER, confidence DOUBLE) RETURNS double
NO SQL
DETERMINISTIC
SQL SECURITY INVOKER
COMMENT 'Return the confidence interval lower bound (wilson score) for POS positive votes in TRIALS trials, with a desired confidence of CONFIDENCE'
BEGIN
DECLARE z, phat DOUBLE;
IF pos=0 THEN
RETURN 0.0;
ELSEIF pos > trials THEN
RETURN NULL;
END IF;
SET z = PNORM(1-confidence/2);
SET phat = 1.0*pos/trials;
RETURN (phat+z*z/(2*trials) - z * SQRT((phat*(1-phat)+z*z/(4*trials))/trials))/(1+z*z/trials);
END;
;;
delimiter ;
delimiter ;;
CREATE DEFINER=`root`@`localhost` FUNCTION `pnorm`(qn DOUBLE) RETURNS double
NO SQL
DETERMINISTIC
SQL SECURITY INVOKER
COMMENT 'Inverse gaussian distribution (P normal distribution)'
BEGIN
DECLARE w3 DOUBLE;
DECLARE w1 DOUBLE DEFAULT 1.570796288;
DECLARE ai DOUBLE DEFAULT 0.03706987906;
DECLARE i TINYINT UNSIGNED DEFAULT 2;
IF qn<=0.0 or qn>=1.0 THEN
RETURN NULL;
ELSEIF qn=0.5 THEN
RETURN 0.0;
END IF;
SET w3 = -LN(4.0*qn*(1.0-qn));
SET w1 = w1 + ai*w3;
REPEAT
SET ai = CASE i
WHEN 2 THEN -0.8364353589e-3
WHEN 3 THEN -0.2250947176e-3
WHEN 4 THEN 0.6841218299e-5
WHEN 5 THEN 0.5824238515e-5
WHEN 6 THEN -0.104527497e-5
WHEN 7 THEN 0.8360937017e-7
WHEN 8 THEN -0.3231081277e-8
WHEN 9 THEN 0.3657763036e-10
WHEN 10 THEN 0.6936233982e-12
END;
SET w1 = w1 + (ai * POWER(w3, i)), i = i+1;
UNTIL i=11 END REPEAT;
RETURN IF(qn<0.5,-1.0,1.0)*SQRT(w1*w3);
END;
;;
delimiter ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment