Functions for statistics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Functions for statistics | |
import Data.List | |
-- Averages | |
average p = (sum p) / (fromIntegral $ length p) -- arithmetic mean | |
geomean p = (product p) ** (1.0 / (fromIntegral $ length p)) -- geometric mean | |
harmean p = (fromIntegral $ length p) / (sum $ map (\x -> 1.0 / x) p) -- harmonic mean | |
rms p = sqrt $ average $ map (\x -> x * x) p -- quadratic mean (RMS) | |
-- Quantile | |
quantile q p | 0 <= q && q <= 1 -- any quantile | |
= f p $ fromIntegral 1 - q + q * (fromIntegral $ length p) | |
where f p x | ceiling x /= floor x = ((fromIntegral $ ceiling x) - x) * (sort p !! (floor x - 1)) | |
+ (x - (fromIntegral $ floor x)) * (sort p !! (ceiling x - 1)) | |
| otherwise = sort p !! (truncate x - 1) | |
quantile q _ | otherwise = 0/0 -- out of range: intentionally returns NaN | |
median p = quantile 0.50 p -- median | |
quart 0 p = quantile 0.00 p -- zeroth quartile (minimum) | |
quart 1 p = quantile 0.25 p -- first quartile | |
quart 2 p = quantile 0.50 p -- second quartile (median) | |
quart 3 p = quantile 0.75 p -- third quartile | |
quart 4 p = quantile 1.00 p -- fourth quartile (maximum) | |
quart _ _ = 0/0 -- invalid: intentionally returns NaN | |
lquart p = quart 1 p -- lower (first) quartile | |
uquart p = quart 3 p -- upper (third) quartile | |
iqr p = uquart p - lquart p -- interquartile range (IQR), or middle fifty | |
-- Moments | |
var p = average $ map (\x -> (x - average p) ** 2) p -- variance | |
stddev p = sqrt $ var p -- standard deviation | |
moment q p = average $ map (\x -> (x - average p) ** q) p -- moment | |
skew p = moment 3 p / (stddev p ** 3) -- skewness | |
kurt p = moment 4 p / (stddev p ** 4) - 3 -- (excess) kurtosis | |
-- Functions for outlier detection | |
inSigma q p x = abs ((x - average p) / stddev p) <= q -- check if given x in population p is within q times sigma | |
in2sigma = inSigma 2 | |
in3sigma = inSigma 3 | |
filterSigma q p = filter (inSigma q p) p -- from population p drop members which is out of q times sigma | |
filter2sigma = filterSigma 2 | |
filter3sigma = filterSigma 3 | |
inIqr q p x = x <= (uquart p + q * iqr p) && x >= (lquart p - q * iqr p) -- check if given x in population p is within q times IQR | |
in1_5iqr = inIqr 1.5 | |
filterIqr q p = filter (inIqr q p) p -- from population p drop members which is out of q times IQR | |
filter1_5iqr = filterIqr 1.5 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment