Created
February 23, 2014 13:27
-
-
Save tatyusa/9171479 to your computer and use it in GitHub Desktop.
k-means method on Haskell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.List | |
import System.IO | |
import System.Random | |
type Cluster = Int | |
type Point = (Double, Double) -- Point in R^2 | |
type CPoint = (Cluster, Point) -- Classified Point | |
---- Euclid Distance between Two Points | |
dist :: Point -> Point -> Double | |
dist (x0,y0) (x1,y1) = sqrt $ (x0-x1)^2 + (y0-y1)^2 | |
---- Center of Points | |
center :: [Point] -> Point | |
center = (\(ys,zs) -> (average ys, average zs)).unzip | |
where average xs = (sum xs)/((fromIntegral.length) xs) | |
---- CPoints are Same Cluster or Not | |
colleague :: CPoint -> CPoint -> Bool | |
colleague (c0,_) (c1,_) = c0 == c1 | |
--- One Step of K-means Method | |
kmeans :: [CPoint] -> [CPoint] | |
kmeans xs = map (\(_,p) -> (classify p, p)) xs | |
where | |
---- Classify a Point | |
classify :: Point -> Cluster | |
classify x = (snd.last.sort) $ map (\(c,p) -> (dist x p, c)) (centers xs) | |
---- | |
---- Calculate Prototypes | |
centers :: [CPoint] -> [CPoint] | |
centers xs = map ((\(cs,ps) -> (head cs, center ps)).unzip) $ (groupBy colleague).sort $ xs | |
main = do | |
gen <- getStdGen | |
handle <- openFile "data.csv" ReadMode | |
content <- hGetContents handle | |
let | |
points :: [Point] | |
points = map makePoint $ lines content | |
where | |
---- "Double, Double" -> (Double, Double) | |
makePoint :: String -> Point | |
makePoint ps = | |
let substr = groupBy (\x y -> and [x/=',', y/=',']) ps | |
in (read $ head substr, read $ last substr) | |
---- | |
---- Create Initial Value | |
iv :: [CPoint] | |
iv = zip (randomRs (0,2) gen :: [Int]) points | |
---- K-means Method !! | |
experiment = iterate kmeans iv | |
result = (groupBy colleague).sort $ experiment!!100 | |
output :: Int -> IO() | |
output n = do | |
handle <- openFile ("result"++(show n)++".csv") WriteMode | |
mapM_ (hPutStrLn handle) $ map form (result!!n) | |
hClose handle | |
where | |
form :: CPoint -> String | |
form (c,p) = (show $ fst p) ++ ", " ++ (show $ snd p) | |
mapM_ output [0..2] | |
hClose handle |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment