Skip to content

Instantly share code, notes, and snippets.

@msakai
Last active December 21, 2016 14:23
Show Gist options
  • Save msakai/b553004a7ca76310e11c to your computer and use it in GitHub Desktop.
Save msakai/b553004a7ca76310e11c to your computer and use it in GitHub Desktop.
{-# LANGUAGE FlexibleContexts, ScopedTypeVariables #-}
import Control.Monad
import qualified Data.Vector as V
import Data.Vector.Generic ((!))
import qualified Data.Vector.Generic as VG
import Numeric.AD
import qualified Numeric.Optimization.Algorithms.HagerZhang05.AD as CG
import System.Exit
import qualified System.Random.MWC as Rand
import qualified System.Random.MWC.Distributions as Rand
import Text.CSV
import Text.Printf
h :: forall a v. (RealFloat a, VG.Vector v a) => v a -> v a -> v a
h theta x = a3
where
theta1 i = VG.slice ((inputLayerSize+1) * i) (inputLayerSize+1) theta
theta2 i = VG.slice (theta1Size + (hiddenLayerSize+1) * i) (hiddenLayerSize+1) theta
a2 :: v a
a2 = VG.generate hiddenLayerSize $ \i -> sigmoid $
let theta1_i = theta1 i
in theta1_i ! 0 + sum [ theta1_i!(j+1) * x!j | j <- [0..inputLayerSize-1] ]
a3 = VG.generate numLabel $ \i -> sigmoid $
let theta2_i = theta2 i
in theta2_i ! 0 + sum [ theta2_i!(j+1) * a2!j | j <- [0..hiddenLayerSize-1] ]
predict :: forall a v. (RealFloat a, VG.Vector v a) => v a -> v a -> Int
predict theta x = VG.maxIndex (h theta x)
cost :: forall a v. (RealFloat a, VG.Vector v a) => a -> [(v a, Int)] -> v a -> a
cost lambda samples theta
= (1 / m) * sum [-log (if k == y then o!k else 1 - o!k) | (x,y) <- samples, let o = h theta x, k <- [0 .. numLabel-1]]
+ (lambda / (2*m)) * (sum [x**2 | i <- [0..hiddenLayerSize-1], x <- VG.toList (VG.slice 1 inputLayerSize (theta1 i))] +
sum [x**2 | i <- [0..numLabel-1], x <- VG.toList (VG.slice 1 hiddenLayerSize (theta2 i))])
where
m = fromIntegral (length samples)
theta1 i = VG.slice ((inputLayerSize+1) * i) (inputLayerSize+1) theta
theta2 i = VG.slice (theta1Size + (hiddenLayerSize+1) * i) (hiddenLayerSize+1) theta
sigmoid :: RealFloat a => a -> a
sigmoid x = 1 / (1 + exp (-x))
inputLayerSize = 400
hiddenLayerSize = 25
numLabel = 10
thetaSize = theta1Size + theta2Size
theta1Size = (inputLayerSize+1) * hiddenLayerSize
theta2Size = (hiddenLayerSize+1) * numLabel
epsilonInit = 0.12
{-
Data files are available from the following URLs.
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_X.csv.gz
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_y.csv
They are converted from ex4data1.mat using Octave as below.
> load('ex4data1.mat');
> csvwrite("ex4data1_X.csv", X);
> csvwrite("ex4data1_y.csv", y);
-}
loadData :: VG.Vector v Double => IO [(v Double, Int)]
loadData = do
x <- do
ret <- parseCSVFromFile "ex4data1_X.csv"
case ret of
Left err -> print err >> exitFailure
Right rows -> return [VG.fromList [read x :: Double | x <- row] | row <- rows, row /= [""]]
y <- do
ret <- parseCSVFromFile "ex4data1_y.csv"
case ret of
Left err -> print err >> exitFailure
Right rows -> return [if y' == 10 then 0 else y' | [y] <- rows, y /= "", let y' = read y]
return $ zip x y
train :: Double -> [(V.Vector Double, Int)] -> IO (V.Vector Double)
train lambda samples = do
let params = CG.defaultParameters
{ CG.printFinal = True
, CG.printParams = True
, CG.verbose = CG.VeryVerbose -- CG.Verbose
, CG.maxItersFac = 50 / fromIntegral thetaSize
}
grad_tol = 0.00001
theta0 <- randTheta
(theta, _result1, _stat1) <- CG.optimize params grad_tol theta0 (cost (auto lambda) [(fmap auto x, y) | (x,y) <- samples])
return theta
randTheta :: VG.Vector v Double => IO (v Double)
randTheta = do
gen <- Rand.createSystemRandom
VG.replicateM thetaSize (Rand.uniformR (-epsilonInit, epsilonInit) gen)
main :: IO ()
main = do
let trainSize = 200
samples <- loadData
(trainData, testData) <- do
gen <- Rand.createSystemRandom
samples2 <- Rand.uniformShuffle (V.fromList samples) gen
return (take trainSize (V.toList samples2), drop trainSize (V.toList samples2))
let lambda = 1
theta <- train lambda trainData
printf "precision (training data): %f\n" $ sum [1::Double | (x,y) <- trainData, predict theta x == y] / fromIntegral (length trainData)
printf "precision (test data): %f\n" $ sum [1::Double | (x,y) <- testData, predict theta x == y] / fromIntegral (length testData)
{-# LANGUAGE FlexibleContexts, ScopedTypeVariables #-}
import qualified Data.Vector as V
import qualified Data.Vector.Storable as VS
import qualified Data.Vector.Generic as VG
import Numeric.LinearAlgebra.HMatrix
import qualified Numeric.Optimization.Algorithms.HagerZhang05 as CG
import System.Exit
import qualified System.Random.MWC as Rand
import qualified System.Random.MWC.Distributions as Rand
import Text.CSV
import Text.Printf
h :: Vector Double -> Matrix Double -> Matrix Double
h theta x = a3
where
theta1 = reshape (inputLayerSize+1) (VG.slice 0 theta1Size theta)
theta2 = reshape (hiddenLayerSize+1) (VG.slice theta1Size theta2Size theta)
a1 = fromBlocks [[1, x]]
z2 = a1 <> tr theta1
a2 = fromBlocks [[1, sigmoid z2]]
z3 = a2 <> tr theta2
a3 = sigmoid z3
predict :: Vector Double -> Matrix Double -> Vector Int
predict theta x = VG.fromList . map VG.maxIndex . toRows $ h theta x
cost :: Double -> (Matrix Double, Vector Int) -> Vector Double -> (Double, Vector Double)
cost lambda (x,y) theta = (val, thetaGrad)
where
m = fromIntegral (VG.length y)
-- hiddenLayerSize >< 1+inputLayerSize
theta1 = reshape (inputLayerSize+1) (VG.slice 0 theta1Size theta)
-- numLabel >< 1+hiddenLayerSize
theta2 = reshape (hiddenLayerSize+1) (VG.slice theta1Size theta2Size theta)
a1 = fromBlocks [[1, x]] -- m >< 1+inputLayerSize
z2 = a1 <> tr theta1 -- m >< hiddenLayerSize
a2 = fromBlocks [[1, sigmoid z2]] -- m >< 1+hiddenLayerSize
z3 = a2 <> tr theta2 -- m >< numLabel
a3 = sigmoid z3 -- m >< numLabel
-- m >< numLabel
y2 = fromRows [ VG.fromList [if j == yi then 1 else 0 | j <- [0..numLabel-1]] | yi <- VG.toList y ]
val = (1 / m) * sumElements (- y2 * log(a3) - (1 - y2) * log(1 - a3))
+ (lambda / (2*m)) * (sumElements (dropColumns 1 theta1 ^ (2::Int)) +
sumElements (dropColumns 1 theta2 ^ (2::Int)))
delta3 = a3 - y2 -- m >< numLabel
delta2 = dropColumns 1 (delta3 <> theta2) * sigmoid' z2 -- m >< hiddenLayerSize
-- hiddenLayerSize >< 1+inputLayerSize
theta1Grad = scale (1/m) (tr delta2 <> a1) + scale (lambda / m) (fromBlocks [[0, dropColumns 1 theta1]])
-- numLabel >< 1+hiddenLayerSize
theta2Grad = scale (1/m) (tr delta3 <> a2) + scale (lambda / m) (fromBlocks [[0, dropColumns 1 theta2]])
thetaGrad = flatten theta1Grad VG.++ flatten theta2Grad
sigmoid :: Floating a => a -> a
sigmoid x = 1 / (1 + exp (-x))
sigmoid' :: Floating a => a -> a
sigmoid' x = y * (1 - y)
where y = sigmoid x
inputLayerSize = 400
hiddenLayerSize = 25
numLabel = 10
thetaSize = theta1Size + theta2Size
theta1Size = (inputLayerSize+1) * hiddenLayerSize
theta2Size = (hiddenLayerSize+1) * numLabel
epsilonInit = 0.12
{-
Data files are available from the following URLs.
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_X.csv.gz
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_y.csv
They are converted from ex4data1.mat using Octave as below.
> load('ex4data1.mat');
> csvwrite("ex4data1_X.csv", X);
> csvwrite("ex4data1_y.csv", y);
-}
loadData :: VG.Vector v Double => IO [(v Double, Int)]
loadData = do
x <- do
ret <- parseCSVFromFile "ex4data1_X.csv"
case ret of
Left err -> print err >> exitFailure
Right rows -> return [VG.fromList [read x :: Double | x <- row] | row <- rows, row /= [""]]
y <- do
ret <- parseCSVFromFile "ex4data1_y.csv"
case ret of
Left err -> print err >> exitFailure
Right rows -> return [if y' == 10 then 0 else y' | [y] <- rows, y /= "", let y' = read y]
return $ zip x y
train :: Double -> [(VS.Vector Double, Int)] -> IO (VS.Vector Double)
train lambda samples = do
let samples2 = (fromRows (map fst samples), VG.fromList (map snd samples))
let params = CG.defaultParameters
{ CG.printFinal = True
, CG.printParams = True
, CG.verbose = CG.VeryVerbose -- CG.Verbose
, CG.maxItersFac = 50 / fromIntegral thetaSize
}
grad_tol = 0.00001
(theta0 :: VS.Vector Double) <- randTheta
(theta, _result1, _stat1) <- CG.optimize params grad_tol theta0
(CG.VFunction (fst . cost lambda samples2))
(CG.VGradient (snd . cost lambda samples2))
(Just (CG.VCombined (cost lambda samples2)))
return theta
randTheta :: VG.Vector v Double => IO (v Double)
randTheta = do
gen <- Rand.createSystemRandom
VG.replicateM thetaSize (Rand.uniformR (-epsilonInit, epsilonInit) gen)
precision :: VS.Vector Double -> [(VS.Vector Double, Int)] -> Double
precision theta samples = VG.sum (VG.zipWith (\a b -> if a==b then 1 else 0) (predict theta x) y) / m
where
x = fromRows (map fst samples)
y = VG.fromList (map snd samples)
m = fromIntegral $ length samples
main :: IO ()
main = do
let trainSize = 200
samples <- loadData
(trainData, testData) <- do
gen <- Rand.createSystemRandom
samples2 <- Rand.uniformShuffle (V.fromList samples) gen
return (take trainSize (V.toList samples2), drop trainSize (V.toList samples2))
let lambda = 1
theta <- train lambda trainData
printf "precision (training data): %f\n" (precision theta trainData)
printf "precision (test data): %f\n"(precision theta testData)
{-# LANGUAGE
DataKinds
, ExistentialQuantification
, FlexibleContexts
, ScopedTypeVariables
, TypeFamilies
, TypeOperators
#-}
{-# OPTIONS_GHC -fplugin GHC.TypeLits.KnownNat.Solver #-}
{-# OPTIONS_GHC -fplugin GHC.TypeLits.Presburger #-}
{-# OPTIONS_GHC -Wall #-}
import Control.Applicative
import Control.Arrow ((***))
import Control.Monad
import Data.Maybe
import Data.Proxy
import qualified Data.Vector as V
import qualified Data.Vector.Storable as VS
import qualified Data.Vector.Generic as VG
import GHC.TypeLits
import qualified Numeric.LinearAlgebra.HMatrix as HM
import Numeric.LinearAlgebra.Static
import qualified Numeric.Optimization.Algorithms.HagerZhang05 as CG
import System.Exit
import qualified System.Random.MWC as Rand
import qualified System.Random.MWC.Distributions as Rand
import Text.CSV
import Text.Printf
type InputLayerSize = 400
type HiddenLayerSize = 25
type NumLabel = 10
type Params = (L HiddenLayerSize (1 + InputLayerSize), L NumLabel (1 + HiddenLayerSize))
numLabel :: Int
numLabel = fromIntegral (natVal (Proxy :: Proxy NumLabel))
params1Size, params2Size :: Int
params1Size = case size (fst (undefined :: Params)) of { (m,n) -> m*n }
params2Size = case size (snd (undefined :: Params)) of { (m,n) -> m*n }
h :: forall m. KnownNat m => Params -> L m InputLayerSize -> L m NumLabel
h (theta1,theta2) x = a3
where
a1 = col 1 ||| x :: L m (1+InputLayerSize)
z2 = a1 <> tr theta1 :: L m HiddenLayerSize
a2 = col 1 ||| sigmoid z2 :: L m (1+HiddenLayerSize)
z3 = a2 <> tr theta2 :: L m NumLabel
a3 = sigmoid z3 :: L m NumLabel
predict :: forall m. KnownNat m => Params -> L m InputLayerSize -> HM.Vector Int
predict theta x = VG.fromList . map (VG.maxIndex . unwrap) . toRows $ h theta x
cost
:: forall m. KnownNat m
=> ℝ -> (L m InputLayerSize, HM.Vector Int) -> Params -> (ℝ, Params)
cost lambda (x,y) (theta1,theta2) = (val, (theta1Grad, theta2Grad))
where
m = fromIntegral $ fst $ size x
a1 = col 1 ||| x :: L m (1+InputLayerSize)
z2 = a1 <> tr theta1 :: L m HiddenLayerSize
a2 = col 1 ||| sigmoid z2 :: L m (1+HiddenLayerSize)
z3 = a2 <> tr theta2 :: L m NumLabel
a3 = sigmoid z3 :: L m NumLabel
y2 :: L m NumLabel
Just y2 = fromRows [ fromJust $ create $ VS.generate numLabel (\j -> if j == yi then 1 else 0)
| yi <- VG.toList y ]
val = (1 / m) * sumElements (- y2 * log(a3) - (1 - y2) * log(1 - a3))
+ (lambda / (2*m)) * (sumElements (dropFirstColumn theta1 ^ (2::Int)) +
sumElements (dropFirstColumn theta2 ^ (2::Int)))
delta3 :: L m NumLabel
delta3 = a3 - y2
delta2 :: L m HiddenLayerSize
delta2 = dropFirstColumn (delta3 <> theta2) * sigmoid' z2
theta1Grad :: L HiddenLayerSize (1+InputLayerSize)
theta1Grad = scale (1/m) (tr delta2 <> a1) + scale (lambda / m) (col 0 ||| dropFirstColumn theta1)
theta2Grad :: L NumLabel (1+HiddenLayerSize)
theta2Grad = scale (1/m) (tr delta3 <> a2) + scale (lambda / m) (col 0 ||| dropFirstColumn theta2)
fromRows :: forall m n. (KnownNat m, KnownNat n) => [R n] -> Maybe (L m n)
fromRows xss = withRows xss exactDims
dropFirstColumn :: forall m n. (KnownNat m, KnownNat n) => L m (1 + n) -> L m n
dropFirstColumn mat = snd (splitCols mat :: (L m 1, L m n))
sumElements :: forall m n. (KnownNat m, KnownNat n) => L m n -> ℝ
sumElements = HM.sumElements . unwrap
scale
:: forall field vec mat n m. (Num field, KnownNat m, KnownNat n)
=> Domain field vec mat => field -> mat m n -> mat m n
scale a = dmmap (a*)
sigmoid :: Floating a => a -> a
sigmoid x = 1 / (1 + exp (-x))
sigmoid' :: Floating a => a -> a
sigmoid' x = y * (1 - y)
where y = sigmoid x
epsilonInit :: ℝ
epsilonInit = 0.12
{-
Data files are available from the following URLs.
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_X.csv.gz
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_y.csv
They are converted from ex4data1.mat using Octave as below.
> load('ex4data1.mat');
> csvwrite("ex4data1_X.csv", X);
> csvwrite("ex4data1_y.csv", y);
-}
loadData :: IO ([R InputLayerSize], [Int])
loadData = do
x <- do
ret <- parseCSVFromFile "ex4data1_X.csv"
case ret of
Left err -> print err >> exitFailure
Right rows ->
return [fromMaybe (error "invalid row size") $ create $ VG.fromList [read x | x <- row] | row <- rows, row /= [""]]
y <- do
ret <- parseCSVFromFile "ex4data1_y.csv"
case ret of
Left err -> print err >> exitFailure
Right rows -> return $ [if y' == 10 then 0 else y' | [y] <- rows, y /= "", let y' = read y]
return (x, y)
train
:: forall m. KnownNat m
=> ℝ -> (L m InputLayerSize, HM.Vector Int) -> IO Params
train lambda samples = do
let params = CG.defaultParameters
{ CG.printFinal = True
, CG.printParams = True
, CG.verbose = CG.VeryVerbose -- CG.Verbose
, CG.maxItersFac = 50 / fromIntegral (params1Size + params2Size)
}
grad_tol = 0.00001
theta0 <- randTheta
(theta, _result1, _stat1) <- CG.optimize params grad_tol (pack theta0)
(CG.VFunction (fst . cost2))
(CG.VGradient (snd . cost2))
(Just (CG.VCombined cost2))
return $ unpack theta
where
cost2 :: HM.Vector ℝ -> (ℝ, HM.Vector ℝ)
cost2 = (id *** pack) . f . unpack
where
f :: Params -> (ℝ, Params)
f = cost lambda samples
pack :: Params -> HM.Vector ℝ
pack (theta1,theta2) = HM.flatten (unwrap theta1) VG.++ HM.flatten (unwrap theta2)
unpack :: HM.Vector ℝ -> Params
unpack xs =
case VG.splitAt params1Size xs of
(ys,zs) -> (matrix (VG.toList ys), matrix (VG.toList zs))
randTheta :: IO Params
randTheta = do
gen <- Rand.createSystemRandom
theta1 <- matrix <$> replicateM params1Size (Rand.uniformR (-epsilonInit, epsilonInit) gen)
theta2 <- matrix <$> replicateM params2Size (Rand.uniformR (-epsilonInit, epsilonInit) gen)
return (theta1, theta2)
precision
:: forall m. KnownNat m
=> Params -> (L m InputLayerSize, HM.Vector Int) -> ℝ
precision theta (x,y) = VG.sum (VG.zipWith (\a b -> if a==b then 1 else 0) (predict theta x) y) / m
where
m = fromIntegral $ fst (size x)
main :: IO ()
main = do
let trainSize = 200
(x,y) <- loadData
gen <- Rand.createSystemRandom
indexes <- Rand.uniformShuffle (VG.enumFromN 0 (length x)) gen
let x' :: V.Vector (R InputLayerSize)
x' = VG.backpermute (V.fromList x) (VG.convert indexes)
y' :: HM.Vector Int
y' = VG.backpermute (VS.fromList y) indexes
(trainX, testX) = VG.splitAt trainSize x'
(trainY, testY) = VG.splitAt trainSize y'
let lambda = 1
theta <- withRows (VG.toList trainX) $ \trainX' -> do
theta <- train lambda (trainX',trainY)
printf "precision (training data): %f\n" (precision theta (trainX',trainY))
return theta
withRows (VG.toList testX) $ \testX' -> do
printf "precision (test data): %f\n" (precision theta (testX',testY))
{-# LANGUAGE FlexibleContexts, ScopedTypeVariables #-}
import Control.Monad
import Control.Monad.ST
import Data.STRef
import qualified Data.Vector as V
import qualified Data.Vector.Storable as VS
import Data.Vector.Generic ((!))
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Generic.Mutable as VM
import qualified Numeric.Optimization.Algorithms.HagerZhang05 as CG
import System.Exit
import qualified System.Random.MWC as Rand
import qualified System.Random.MWC.Distributions as Rand
import Text.CSV
import Text.Printf
h :: forall a v. (RealFloat a, VG.Vector v a) => v a -> v a -> v a
h theta x = a3
where
theta1 i = VG.slice ((inputLayerSize+1) * i) (inputLayerSize+1) theta
theta2 i = VG.slice (theta1Size + (hiddenLayerSize+1) * i) (hiddenLayerSize+1) theta
a2 :: v a
a2 = VG.generate hiddenLayerSize $ \i -> sigmoid $
let theta1_i = theta1 i
in theta1_i ! 0 + sum [ theta1_i!(j+1) * x!j | j <- [0..inputLayerSize-1] ]
a3 = VG.generate numLabel $ \i -> sigmoid $
let theta2_i = theta2 i
in theta2_i ! 0 + sum [ theta2_i!(j+1) * a2!j | j <- [0..hiddenLayerSize-1] ]
predict :: forall a v. (RealFloat a, VG.Vector v a) => v a -> v a -> Int
predict theta x = VG.maxIndex (h theta x)
cost :: forall a v. (RealFloat a, VG.Vector v a) => a -> [(v a, Int)] -> v a -> (a, v a)
cost lambda samples theta = runST $ do
let m = fromIntegral (length samples)
theta1 i = VG.slice ((inputLayerSize+1) * i) (inputLayerSize+1) theta
theta2 i = VG.slice (theta1Size + (hiddenLayerSize+1) * i) (hiddenLayerSize+1) theta
valRef <- newSTRef 0
thetaGrad <- VM.replicate thetaSize 0
let theta1Grad = VM.slice 0 theta1Size thetaGrad
theta2Grad = VM.slice theta1Size theta2Size thetaGrad
forM_ samples $ \(x,y) -> do
let z2 :: v a
z2 = VG.generate hiddenLayerSize $ \i ->
let theta1_i = theta1 i
in theta1_i ! 0 + sum [ theta1_i!(j+1) * x!j | j <- [0..inputLayerSize-1] ]
a2 = VG.map sigmoid z2
z3 :: v a
z3 = VG.generate numLabel $ \i ->
let theta2_i = theta2 i
in theta2_i ! 0 + sum [ theta2_i!(j+1) * a2!j | j <- [0..hiddenLayerSize-1] ]
a3 = VG.map sigmoid z3
forM_ [0 .. numLabel-1] $ \k ->
modifySTRef' valRef (\x -> x - log (if k == y then a3!k else 1 - a3!k))
let delta3 = VG.zipWith (-) a3 (VG.generate numLabel $ \i -> if i == y then 1 else 0)
delta2 = VG.zipWith (*) (VG.generate hiddenLayerSize (\j -> sum [delta3!i * theta2 i ! (1+j) | i <- [0..numLabel-1]])) (VG.map sigmoid' z2)
forM_ [0..numLabel-1] $ \i -> do
modify theta2Grad (+ delta3!i) (i*(hiddenLayerSize+1))
forM_ [0..hiddenLayerSize-1] $ \j -> do
modify theta2Grad (+ (delta3!i * a2!j)) (i*(hiddenLayerSize+1)+1+j)
forM_ [0..hiddenLayerSize-1] $ \i -> do
modify theta1Grad (+ delta2!i) (i*(inputLayerSize+1))
forM_ [0..inputLayerSize-1] $ \j -> do
modify theta1Grad (+ (delta2!i * x!j)) (i*(inputLayerSize+1)+1+j)
modifySTRef' valRef (/ m)
forM_ [0..thetaSize-1] $ \i -> modify thetaGrad (/ m) i
-- Regularization
modifySTRef' valRef $
(+ (lambda / (2*m)) * (sum [x**2 | i <- [0..hiddenLayerSize-1], x <- VG.toList (VG.slice 1 inputLayerSize (theta1 i))] +
sum [x**2 | i <- [0..numLabel-1], x <- VG.toList (VG.slice 1 hiddenLayerSize (theta2 i))]))
forM_ [0..numLabel-1] $ \i -> do
forM_ [0..hiddenLayerSize-1] $ \j -> do
modify theta2Grad (+ (theta2 i ! (1+j) * lambda / m)) (i*(1+hiddenLayerSize)+1+j)
forM_ [0..hiddenLayerSize-1] $ \i -> do
forM_ [0..inputLayerSize-1] $ \j -> do
modify theta1Grad (+ (theta1 i ! (1+j) * lambda / m)) (i*(1+inputLayerSize)+1+j)
val <- readSTRef valRef
g <- VG.unsafeFreeze thetaGrad
return (val, g)
-- modify :: (PrimMonad m, MVector v a) => v (PrimState m) a -> (a -> a) -> Int -> m ()
modify v f i = do
x <- VM.read v i
VM.write v i (f x)
sigmoid :: RealFloat a => a -> a
sigmoid x = 1 / (1 + exp (-x))
sigmoid' :: RealFloat a => a -> a
sigmoid' x = y * (1 - y)
where y = sigmoid x
inputLayerSize = 400
hiddenLayerSize = 25
numLabel = 10
thetaSize = theta1Size + theta2Size
theta1Size = (inputLayerSize+1) * hiddenLayerSize
theta2Size = (hiddenLayerSize+1) * numLabel
epsilonInit = 0.12
{-
Data files are available from the following URLs.
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_X.csv.gz
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_y.csv
They are converted from ex4data1.mat using Octave as below.
> load('ex4data1.mat');
> csvwrite("ex4data1_X.csv", X);
> csvwrite("ex4data1_y.csv", y);
-}
loadData :: VG.Vector v Double => IO [(v Double, Int)]
loadData = do
x <- do
ret <- parseCSVFromFile "ex4data1_X.csv"
case ret of
Left err -> print err >> exitFailure
Right rows -> return [VG.fromList [read x :: Double | x <- row] | row <- rows, row /= [""]]
y <- do
ret <- parseCSVFromFile "ex4data1_y.csv"
case ret of
Left err -> print err >> exitFailure
Right rows -> return [if y' == 10 then 0 else y' | [y] <- rows, y /= "", let y' = read y]
return $ zip x y
train :: Double -> [(VS.Vector Double, Int)] -> IO (VS.Vector Double)
train lambda samples = do
let params = CG.defaultParameters
{ CG.printFinal = True
, CG.printParams = True
, CG.verbose = CG.VeryVerbose -- CG.Verbose
, CG.maxItersFac = 50 / fromIntegral thetaSize
}
grad_tol = 0.00001
(theta0 :: VS.Vector Double) <- randTheta
(theta, _result1, _stat1) <- CG.optimize params grad_tol theta0
(CG.VFunction (fst . cost lambda samples))
(CG.VGradient (snd . cost lambda samples))
(Just (CG.VCombined (cost lambda samples)))
return theta
randTheta :: VG.Vector v Double => IO (v Double)
randTheta = do
gen <- Rand.createSystemRandom
VG.replicateM thetaSize (Rand.uniformR (-epsilonInit, epsilonInit) gen)
main :: IO ()
main = do
let trainSize = 200
samples <- loadData
(trainData, testData) <- do
gen <- Rand.createSystemRandom
samples2 <- Rand.uniformShuffle (V.fromList samples) gen
return (take trainSize (V.toList samples2), drop trainSize (V.toList samples2))
let lambda = 1
theta <- train lambda trainData
printf "precision (training data): %f\n" $ sum [1::Double | (x,y) <- trainData, predict theta x == y] / fromIntegral (length trainData)
printf "precision (test data): %f\n" $ sum [1::Double | (x,y) <- testData, predict theta x == y] / fromIntegral (length testData)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment