Skip to content

Instantly share code, notes, and snippets.

@gavinwhyte
Last active September 7, 2015 05:52
Show Gist options
  • Save gavinwhyte/7023ccf7a272179bdaab to your computer and use it in GitHub Desktop.
Save gavinwhyte/7023ccf7a272179bdaab to your computer and use it in GitHub Desktop.
Pearson Correlation Coefficient.
-- Create a new file, which we will call Main.hs
main :: IO ()
main = do
let d1 = [3,3,3,4,4,4,5,5,5]
let d2 = [1,1,2,2,3,4,4,5,5]
let r = pearson d1 d2
print r
pearson xs ys = (n * sumXY - sumX * sumY) /
sqrt ( (n * sumX2 - sumX*sumX) *
(n * sumY2 - sumY*sumY) )
where n = fromIntegral (length xs)
sumX = sum xs
sumY = sum ys
sumX2 = sum $ zipWith (*) xs xs
sumY2 = sum $ zipWith (*) ys ys
sumXY = sum $ zipWith (*) xs ys
@jberthold
Copy link

Thought you might like this...

-- module PearsonScan where

import Control.Monad
import System.Environment
import System.IO
import Data.List(inits)

import qualified Data.ByteString.Lazy as B

main = do
  args <- getArgs
  when (null args) (error "please provide an input file name")
  let twice f a b = (f a, f b)
  xs   <- B.readFile (head args) -- file is read lazily
  ys   <- B.getContents          -- reads stdin lazily
  let input = B.zipWith (twice fromIntegral) xs ys
  let results = uncurry pearsonScan (unzip input)
  mapM_ (putStrLn . show) $ results
pearsonScan :: [Double] -> [Double] -> [Double]
pearsonScan xs ys
    = map (uncurry6 mkPearson)
          (zip6 [1..] sumXs sumYs sumXYs sumX2s sumY2s)
      where sumXs  = scanl1 (+) xs
            sumYs  = scanl1 (+) ys
            sumXYs = mulSum xs ys
            sumX2s = mulSum xs xs
            sumY2s = mulSum ys ys
            mkPearson n sumX sumY sumXY sumX2 sumY2
                   =  (n * sumXY - sumX * sumY) /
                      sqrt ( (n * sumX2 - sumX*sumX) *
                             (n * sumY2 - sumY*sumY) )

mulSum as bs = scanl1 (+) (zipWith (*) as bs)

uncurry6 :: (a->b->c->d->e->f->g) -> (((((a,b),c),d),e),f) -> g
uncurry6 =  uncurry . uncurry . uncurry . uncurry . uncurry
zip6 :: [a] -> [b] -> [c] -> [d] -> [e] -> [f] -> [(((((a,b),c),d),e),f)]
zip6 as bs cs ds es fs -- = foldl1 zip [as,bs,cs,ds,es,fs] --nope
    = zip (zip (zip (zip (zip as bs) cs) ds) es) fs

---------------------old code-----------------
oldPearson xs ys = (n * sumXY - sumX * sumY) /
                sqrt ( (n * sumX2 - sumX*sumX) *
                       (n * sumY2 - sumY*sumY) )
    where n = fromIntegral (length xs)
          sumX = sum xs
          sumY = sum ys
          sumX2 = sum $ zipWith (*) xs xs
          sumY2 = sum $ zipWith (*) ys ys
          sumXY = sum $ zipWith (*) xs ys

oldmain = do
         let d1 = [3,3,3,4,4,4,5,5,5]
             d2 = [1,1,2,2,3,4,4,5,5]
             r  = oldPearson d1 d2
         print r
-----------------------------------------------
pearsonNoScan xs ys = map (uncurry oldPearson) (zip (inits xs) (inits ys))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment