Skip to content

Instantly share code, notes, and snippets.

Created May 1, 2014 21:10
Show Gist options
  • Save anonymous/11461819 to your computer and use it in GitHub Desktop.
Save anonymous/11461819 to your computer and use it in GitHub Desktop.
module Clustering
import Data.Floats
-- Feature Normalization
-- {{{
foldl1List : (a -> a -> a) -> List a -> a
foldl1List func (x :: xs) =
case xs of
[] => x
y :: ys => foldl1List func ((func x y) :: ys)
foldl1Vect : (a -> a -> a) -> Vect (S n) a -> a
foldl1Vect func (x :: []) = x
foldl1Vect func (x :: (y :: xs)) = foldl1Vect func ((func x y) :: xs)
maximum : Ord a => Vect (S n) a -> a
maximum = foldl1Vect max
minimum : Ord a => Vect (S n) a -> a
minimum = foldl1Vect min
normalizingFunc : Vect (S n) Float -> Float -> Float
normalizingFunc vct val = (val - mean) / (range / 2) where
range = (maximum vct) - (minimum vct)
mean = ((maximum vct) + (minimum vct)) / 2
normalizeVect : Vect (S n) Float -> Vect (S n) Float
normalizeVect lst = map (normalizingFunc lst) lst
-- }}}
-- Gravity
-- {{{
difference : Vect n Float -> Vect n Float -> Vect n Float
difference [] [] = []
difference (x :: xs) (y :: ys) = (x - y) :: (difference xs ys)
square : Float -> Float
square num = pow num 2
distance : Vect (S n) Float -> Vect (S n) Float -> Float
distance point1 point2 = sqrt $ sum $ map square (difference point1 point2)
direction : Vect (S n) Float -> Vect (S n) Float -> Vect (S n) Float
direction point1 point2 = map (/ distance point1 point2) (difference point1 point2)
gravityVector : Vect (S n) Float -> Vect (S n) Float -> Vect (S n) Float
gravityVector point1 point2 = map (* (k / (distance point1 point2))) (direction point1 point2) where
k = 1.0 -- Change this parameter
vectAdd : Vect n Float -> Vect n Float -> Vect n Float
vectAdd [] [] = []
vectAdd (x :: xs) (y :: ys) = (x + y) :: vectAdd xs ys
gravityList : Vect (S n) Float -> List (Vect (S n) Float) -> List (Vect (S n) Float)
gravityList point points = map (gravityVector point) points
gravitySum : Vect (S n) Float -> List (Vect (S n) Float) -> Vect (S n) Float
gravitySum point points = foldl1List vectAdd (gravityList point points)
gravityMove : List (Vect (S n) Float) -> Vect (S n) Float -> Vect (S n) Float
gravityMove points point = vectAdd point (gravitySum point points)
applyFunc : (a -> a) -> List (b, a) -> List (b, a)
applyFunc func [] = []
applyFunc func (x :: xs) = (fst x, func $ snd x) :: (applyFunc func xs)
gravityIteration : List (Nat, Vect (S n) Float) -> List (Nat, Vect (S n) Float)
gravityIteration points = applyFunc (gravityMove (map snd points)) points
-- }}}
-- Retrieving Data
-- {{{
errorMagnitude : List (Vect (S n) Float) -> Vect (S n) Float -> Float
errorMagnitude points point = sqrt $ foldl1Vect (+) $ map square (gravitySum point points)
totalError : List (Vect (S n) Float) -> Float
totalError points = sum $ map (errorMagnitude points) points
cluster : List (Nat, Vect (S n) Float) -> List (Nat, Vect (S n) Float)
cluster points = if ((totalError (map snd points)) < k ) then points else cluster (gravityIteration points) where
k = 1.0 -- Change this parameter
lowError : List (Nat, Vect (S n) Float) -> (Nat, Vect (S n) Float) -> Bool
lowError points point = (errorMagnitude (map snd points) (snd point)) < k where
k = 0.3 -- Change this parameter
centers : List (Nat, Vect (S n) Float) -> List (Nat, Vect (S n) Float)
centers points = filter (lowError points) points
closestCenter : List (Nat, Vect (S n) Float) -> Vect (S n) Float -> Vect (S n) Float
closestCenter (x :: xs) point = case xs of
[] => snd x
(y :: ys) => if (distance point (snd x) > distance point (snd y)) then closestCenter (x :: ys) point else closestCenter (y :: ys) point
indexInList : Eq a => List (a, b) -> (a, b) -> Bool
indexInList [] tup = False
indexInList (x :: xs) tup = case ((fst x) == (fst tup)) of
True => True
False => indexInList xs tup
removeByIndex : Eq a => a -> List (a, b) -> List (a, b)
removeByIndex val [] = []
removeByIndex val (x :: xs) = if (fst x == val) then (removeByIndex val xs) else (x :: removeByIndex val xs)
unifyOnIndices : Eq a => List (a, b) -> List (a, List b)
unifyOnIndices [] = []
unifyOnIndices (x :: xs) = case (indexInList xs x) of
False => (fst x, [snd x]) :: unifyOnIndices xs
True => (fst x, map snd [a | a <- xs, fst a == fst x]) :: (unifyOnIndices $ removeByIndex (fst x) xs)
nonStrictZip : List a -> List b -> List (a, b)
nonStrictZip [] [] = []
nonStrictZip [] (x :: xs) = []
nonStrictZip (x :: xs) [] = []
nonStrictZip (x :: xs) (y :: ys) = (x, y) :: (nonStrictZip xs ys)
listClusters : List (Nat, Vect (S n) Float) -> List (Vect (S n) Float, List Nat)
listClusters points = unifyOnIndices $ nonStrictZip (map (closestCenter $ centers points) (map snd points)) (map fst points)
-- }}}
-- Final algorithm
-- This takes a list of points formed by (index, vector of values), normalizes the values, and lists all cluster centers and their associated points
clusterPoints : List (Nat, Vect (S n) Float) -> List (Vect (S n) Float, List Nat)
clusterPoints points = listClusters $ applyFunc normalizeVect points
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment