Last active
December 21, 2016 14:23
-
-
Save msakai/b553004a7ca76310e11c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE FlexibleContexts, ScopedTypeVariables #-} | |
import Control.Monad | |
import qualified Data.Vector as V | |
import Data.Vector.Generic ((!)) | |
import qualified Data.Vector.Generic as VG | |
import Numeric.AD | |
import qualified Numeric.Optimization.Algorithms.HagerZhang05.AD as CG | |
import System.Exit | |
import qualified System.Random.MWC as Rand | |
import qualified System.Random.MWC.Distributions as Rand | |
import Text.CSV | |
import Text.Printf | |
h :: forall a v. (RealFloat a, VG.Vector v a) => v a -> v a -> v a | |
h theta x = a3 | |
where | |
theta1 i = VG.slice ((inputLayerSize+1) * i) (inputLayerSize+1) theta | |
theta2 i = VG.slice (theta1Size + (hiddenLayerSize+1) * i) (hiddenLayerSize+1) theta | |
a2 :: v a | |
a2 = VG.generate hiddenLayerSize $ \i -> sigmoid $ | |
let theta1_i = theta1 i | |
in theta1_i ! 0 + sum [ theta1_i!(j+1) * x!j | j <- [0..inputLayerSize-1] ] | |
a3 = VG.generate numLabel $ \i -> sigmoid $ | |
let theta2_i = theta2 i | |
in theta2_i ! 0 + sum [ theta2_i!(j+1) * a2!j | j <- [0..hiddenLayerSize-1] ] | |
predict :: forall a v. (RealFloat a, VG.Vector v a) => v a -> v a -> Int | |
predict theta x = VG.maxIndex (h theta x) | |
cost :: forall a v. (RealFloat a, VG.Vector v a) => a -> [(v a, Int)] -> v a -> a | |
cost lambda samples theta | |
= (1 / m) * sum [-log (if k == y then o!k else 1 - o!k) | (x,y) <- samples, let o = h theta x, k <- [0 .. numLabel-1]] | |
+ (lambda / (2*m)) * (sum [x**2 | i <- [0..hiddenLayerSize-1], x <- VG.toList (VG.slice 1 inputLayerSize (theta1 i))] + | |
sum [x**2 | i <- [0..numLabel-1], x <- VG.toList (VG.slice 1 hiddenLayerSize (theta2 i))]) | |
where | |
m = fromIntegral (length samples) | |
theta1 i = VG.slice ((inputLayerSize+1) * i) (inputLayerSize+1) theta | |
theta2 i = VG.slice (theta1Size + (hiddenLayerSize+1) * i) (hiddenLayerSize+1) theta | |
sigmoid :: RealFloat a => a -> a | |
sigmoid x = 1 / (1 + exp (-x)) | |
inputLayerSize = 400 | |
hiddenLayerSize = 25 | |
numLabel = 10 | |
thetaSize = theta1Size + theta2Size | |
theta1Size = (inputLayerSize+1) * hiddenLayerSize | |
theta2Size = (hiddenLayerSize+1) * numLabel | |
epsilonInit = 0.12 | |
{- | |
Data files are available from the following URLs. | |
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_X.csv.gz | |
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_y.csv | |
They are converted from ex4data1.mat using Octave as below. | |
> load('ex4data1.mat'); | |
> csvwrite("ex4data1_X.csv", X); | |
> csvwrite("ex4data1_y.csv", y); | |
-} | |
loadData :: VG.Vector v Double => IO [(v Double, Int)] | |
loadData = do | |
x <- do | |
ret <- parseCSVFromFile "ex4data1_X.csv" | |
case ret of | |
Left err -> print err >> exitFailure | |
Right rows -> return [VG.fromList [read x :: Double | x <- row] | row <- rows, row /= [""]] | |
y <- do | |
ret <- parseCSVFromFile "ex4data1_y.csv" | |
case ret of | |
Left err -> print err >> exitFailure | |
Right rows -> return [if y' == 10 then 0 else y' | [y] <- rows, y /= "", let y' = read y] | |
return $ zip x y | |
train :: Double -> [(V.Vector Double, Int)] -> IO (V.Vector Double) | |
train lambda samples = do | |
let params = CG.defaultParameters | |
{ CG.printFinal = True | |
, CG.printParams = True | |
, CG.verbose = CG.VeryVerbose -- CG.Verbose | |
, CG.maxItersFac = 50 / fromIntegral thetaSize | |
} | |
grad_tol = 0.00001 | |
theta0 <- randTheta | |
(theta, _result1, _stat1) <- CG.optimize params grad_tol theta0 (cost (auto lambda) [(fmap auto x, y) | (x,y) <- samples]) | |
return theta | |
randTheta :: VG.Vector v Double => IO (v Double) | |
randTheta = do | |
gen <- Rand.createSystemRandom | |
VG.replicateM thetaSize (Rand.uniformR (-epsilonInit, epsilonInit) gen) | |
main :: IO () | |
main = do | |
let trainSize = 200 | |
samples <- loadData | |
(trainData, testData) <- do | |
gen <- Rand.createSystemRandom | |
samples2 <- Rand.uniformShuffle (V.fromList samples) gen | |
return (take trainSize (V.toList samples2), drop trainSize (V.toList samples2)) | |
let lambda = 1 | |
theta <- train lambda trainData | |
printf "precision (training data): %f\n" $ sum [1::Double | (x,y) <- trainData, predict theta x == y] / fromIntegral (length trainData) | |
printf "precision (test data): %f\n" $ sum [1::Double | (x,y) <- testData, predict theta x == y] / fromIntegral (length testData) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE FlexibleContexts, ScopedTypeVariables #-} | |
import qualified Data.Vector as V | |
import qualified Data.Vector.Storable as VS | |
import qualified Data.Vector.Generic as VG | |
import Numeric.LinearAlgebra.HMatrix | |
import qualified Numeric.Optimization.Algorithms.HagerZhang05 as CG | |
import System.Exit | |
import qualified System.Random.MWC as Rand | |
import qualified System.Random.MWC.Distributions as Rand | |
import Text.CSV | |
import Text.Printf | |
h :: Vector Double -> Matrix Double -> Matrix Double | |
h theta x = a3 | |
where | |
theta1 = reshape (inputLayerSize+1) (VG.slice 0 theta1Size theta) | |
theta2 = reshape (hiddenLayerSize+1) (VG.slice theta1Size theta2Size theta) | |
a1 = fromBlocks [[1, x]] | |
z2 = a1 <> tr theta1 | |
a2 = fromBlocks [[1, sigmoid z2]] | |
z3 = a2 <> tr theta2 | |
a3 = sigmoid z3 | |
predict :: Vector Double -> Matrix Double -> Vector Int | |
predict theta x = VG.fromList . map VG.maxIndex . toRows $ h theta x | |
cost :: Double -> (Matrix Double, Vector Int) -> Vector Double -> (Double, Vector Double) | |
cost lambda (x,y) theta = (val, thetaGrad) | |
where | |
m = fromIntegral (VG.length y) | |
-- hiddenLayerSize >< 1+inputLayerSize | |
theta1 = reshape (inputLayerSize+1) (VG.slice 0 theta1Size theta) | |
-- numLabel >< 1+hiddenLayerSize | |
theta2 = reshape (hiddenLayerSize+1) (VG.slice theta1Size theta2Size theta) | |
a1 = fromBlocks [[1, x]] -- m >< 1+inputLayerSize | |
z2 = a1 <> tr theta1 -- m >< hiddenLayerSize | |
a2 = fromBlocks [[1, sigmoid z2]] -- m >< 1+hiddenLayerSize | |
z3 = a2 <> tr theta2 -- m >< numLabel | |
a3 = sigmoid z3 -- m >< numLabel | |
-- m >< numLabel | |
y2 = fromRows [ VG.fromList [if j == yi then 1 else 0 | j <- [0..numLabel-1]] | yi <- VG.toList y ] | |
val = (1 / m) * sumElements (- y2 * log(a3) - (1 - y2) * log(1 - a3)) | |
+ (lambda / (2*m)) * (sumElements (dropColumns 1 theta1 ^ (2::Int)) + | |
sumElements (dropColumns 1 theta2 ^ (2::Int))) | |
delta3 = a3 - y2 -- m >< numLabel | |
delta2 = dropColumns 1 (delta3 <> theta2) * sigmoid' z2 -- m >< hiddenLayerSize | |
-- hiddenLayerSize >< 1+inputLayerSize | |
theta1Grad = scale (1/m) (tr delta2 <> a1) + scale (lambda / m) (fromBlocks [[0, dropColumns 1 theta1]]) | |
-- numLabel >< 1+hiddenLayerSize | |
theta2Grad = scale (1/m) (tr delta3 <> a2) + scale (lambda / m) (fromBlocks [[0, dropColumns 1 theta2]]) | |
thetaGrad = flatten theta1Grad VG.++ flatten theta2Grad | |
sigmoid :: Floating a => a -> a | |
sigmoid x = 1 / (1 + exp (-x)) | |
sigmoid' :: Floating a => a -> a | |
sigmoid' x = y * (1 - y) | |
where y = sigmoid x | |
inputLayerSize = 400 | |
hiddenLayerSize = 25 | |
numLabel = 10 | |
thetaSize = theta1Size + theta2Size | |
theta1Size = (inputLayerSize+1) * hiddenLayerSize | |
theta2Size = (hiddenLayerSize+1) * numLabel | |
epsilonInit = 0.12 | |
{- | |
Data files are available from the following URLs. | |
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_X.csv.gz | |
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_y.csv | |
They are converted from ex4data1.mat using Octave as below. | |
> load('ex4data1.mat'); | |
> csvwrite("ex4data1_X.csv", X); | |
> csvwrite("ex4data1_y.csv", y); | |
-} | |
loadData :: VG.Vector v Double => IO [(v Double, Int)] | |
loadData = do | |
x <- do | |
ret <- parseCSVFromFile "ex4data1_X.csv" | |
case ret of | |
Left err -> print err >> exitFailure | |
Right rows -> return [VG.fromList [read x :: Double | x <- row] | row <- rows, row /= [""]] | |
y <- do | |
ret <- parseCSVFromFile "ex4data1_y.csv" | |
case ret of | |
Left err -> print err >> exitFailure | |
Right rows -> return [if y' == 10 then 0 else y' | [y] <- rows, y /= "", let y' = read y] | |
return $ zip x y | |
train :: Double -> [(VS.Vector Double, Int)] -> IO (VS.Vector Double) | |
train lambda samples = do | |
let samples2 = (fromRows (map fst samples), VG.fromList (map snd samples)) | |
let params = CG.defaultParameters | |
{ CG.printFinal = True | |
, CG.printParams = True | |
, CG.verbose = CG.VeryVerbose -- CG.Verbose | |
, CG.maxItersFac = 50 / fromIntegral thetaSize | |
} | |
grad_tol = 0.00001 | |
(theta0 :: VS.Vector Double) <- randTheta | |
(theta, _result1, _stat1) <- CG.optimize params grad_tol theta0 | |
(CG.VFunction (fst . cost lambda samples2)) | |
(CG.VGradient (snd . cost lambda samples2)) | |
(Just (CG.VCombined (cost lambda samples2))) | |
return theta | |
randTheta :: VG.Vector v Double => IO (v Double) | |
randTheta = do | |
gen <- Rand.createSystemRandom | |
VG.replicateM thetaSize (Rand.uniformR (-epsilonInit, epsilonInit) gen) | |
precision :: VS.Vector Double -> [(VS.Vector Double, Int)] -> Double | |
precision theta samples = VG.sum (VG.zipWith (\a b -> if a==b then 1 else 0) (predict theta x) y) / m | |
where | |
x = fromRows (map fst samples) | |
y = VG.fromList (map snd samples) | |
m = fromIntegral $ length samples | |
main :: IO () | |
main = do | |
let trainSize = 200 | |
samples <- loadData | |
(trainData, testData) <- do | |
gen <- Rand.createSystemRandom | |
samples2 <- Rand.uniformShuffle (V.fromList samples) gen | |
return (take trainSize (V.toList samples2), drop trainSize (V.toList samples2)) | |
let lambda = 1 | |
theta <- train lambda trainData | |
printf "precision (training data): %f\n" (precision theta trainData) | |
printf "precision (test data): %f\n"(precision theta testData) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE | |
DataKinds | |
, ExistentialQuantification | |
, FlexibleContexts | |
, ScopedTypeVariables | |
, TypeFamilies | |
, TypeOperators | |
#-} | |
{-# OPTIONS_GHC -fplugin GHC.TypeLits.KnownNat.Solver #-} | |
{-# OPTIONS_GHC -fplugin GHC.TypeLits.Presburger #-} | |
{-# OPTIONS_GHC -Wall #-} | |
import Control.Applicative | |
import Control.Arrow ((***)) | |
import Control.Monad | |
import Data.Maybe | |
import Data.Proxy | |
import qualified Data.Vector as V | |
import qualified Data.Vector.Storable as VS | |
import qualified Data.Vector.Generic as VG | |
import GHC.TypeLits | |
import qualified Numeric.LinearAlgebra.HMatrix as HM | |
import Numeric.LinearAlgebra.Static | |
import qualified Numeric.Optimization.Algorithms.HagerZhang05 as CG | |
import System.Exit | |
import qualified System.Random.MWC as Rand | |
import qualified System.Random.MWC.Distributions as Rand | |
import Text.CSV | |
import Text.Printf | |
type InputLayerSize = 400 | |
type HiddenLayerSize = 25 | |
type NumLabel = 10 | |
type Params = (L HiddenLayerSize (1 + InputLayerSize), L NumLabel (1 + HiddenLayerSize)) | |
numLabel :: Int | |
numLabel = fromIntegral (natVal (Proxy :: Proxy NumLabel)) | |
params1Size, params2Size :: Int | |
params1Size = case size (fst (undefined :: Params)) of { (m,n) -> m*n } | |
params2Size = case size (snd (undefined :: Params)) of { (m,n) -> m*n } | |
h :: forall m. KnownNat m => Params -> L m InputLayerSize -> L m NumLabel | |
h (theta1,theta2) x = a3 | |
where | |
a1 = col 1 ||| x :: L m (1+InputLayerSize) | |
z2 = a1 <> tr theta1 :: L m HiddenLayerSize | |
a2 = col 1 ||| sigmoid z2 :: L m (1+HiddenLayerSize) | |
z3 = a2 <> tr theta2 :: L m NumLabel | |
a3 = sigmoid z3 :: L m NumLabel | |
predict :: forall m. KnownNat m => Params -> L m InputLayerSize -> HM.Vector Int | |
predict theta x = VG.fromList . map (VG.maxIndex . unwrap) . toRows $ h theta x | |
cost | |
:: forall m. KnownNat m | |
=> ℝ -> (L m InputLayerSize, HM.Vector Int) -> Params -> (ℝ, Params) | |
cost lambda (x,y) (theta1,theta2) = (val, (theta1Grad, theta2Grad)) | |
where | |
m = fromIntegral $ fst $ size x | |
a1 = col 1 ||| x :: L m (1+InputLayerSize) | |
z2 = a1 <> tr theta1 :: L m HiddenLayerSize | |
a2 = col 1 ||| sigmoid z2 :: L m (1+HiddenLayerSize) | |
z3 = a2 <> tr theta2 :: L m NumLabel | |
a3 = sigmoid z3 :: L m NumLabel | |
y2 :: L m NumLabel | |
Just y2 = fromRows [ fromJust $ create $ VS.generate numLabel (\j -> if j == yi then 1 else 0) | |
| yi <- VG.toList y ] | |
val = (1 / m) * sumElements (- y2 * log(a3) - (1 - y2) * log(1 - a3)) | |
+ (lambda / (2*m)) * (sumElements (dropFirstColumn theta1 ^ (2::Int)) + | |
sumElements (dropFirstColumn theta2 ^ (2::Int))) | |
delta3 :: L m NumLabel | |
delta3 = a3 - y2 | |
delta2 :: L m HiddenLayerSize | |
delta2 = dropFirstColumn (delta3 <> theta2) * sigmoid' z2 | |
theta1Grad :: L HiddenLayerSize (1+InputLayerSize) | |
theta1Grad = scale (1/m) (tr delta2 <> a1) + scale (lambda / m) (col 0 ||| dropFirstColumn theta1) | |
theta2Grad :: L NumLabel (1+HiddenLayerSize) | |
theta2Grad = scale (1/m) (tr delta3 <> a2) + scale (lambda / m) (col 0 ||| dropFirstColumn theta2) | |
fromRows :: forall m n. (KnownNat m, KnownNat n) => [R n] -> Maybe (L m n) | |
fromRows xss = withRows xss exactDims | |
dropFirstColumn :: forall m n. (KnownNat m, KnownNat n) => L m (1 + n) -> L m n | |
dropFirstColumn mat = snd (splitCols mat :: (L m 1, L m n)) | |
sumElements :: forall m n. (KnownNat m, KnownNat n) => L m n -> ℝ | |
sumElements = HM.sumElements . unwrap | |
scale | |
:: forall field vec mat n m. (Num field, KnownNat m, KnownNat n) | |
=> Domain field vec mat => field -> mat m n -> mat m n | |
scale a = dmmap (a*) | |
sigmoid :: Floating a => a -> a | |
sigmoid x = 1 / (1 + exp (-x)) | |
sigmoid' :: Floating a => a -> a | |
sigmoid' x = y * (1 - y) | |
where y = sigmoid x | |
epsilonInit :: ℝ | |
epsilonInit = 0.12 | |
{- | |
Data files are available from the following URLs. | |
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_X.csv.gz | |
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_y.csv | |
They are converted from ex4data1.mat using Octave as below. | |
> load('ex4data1.mat'); | |
> csvwrite("ex4data1_X.csv", X); | |
> csvwrite("ex4data1_y.csv", y); | |
-} | |
loadData :: IO ([R InputLayerSize], [Int]) | |
loadData = do | |
x <- do | |
ret <- parseCSVFromFile "ex4data1_X.csv" | |
case ret of | |
Left err -> print err >> exitFailure | |
Right rows -> | |
return [fromMaybe (error "invalid row size") $ create $ VG.fromList [read x | x <- row] | row <- rows, row /= [""]] | |
y <- do | |
ret <- parseCSVFromFile "ex4data1_y.csv" | |
case ret of | |
Left err -> print err >> exitFailure | |
Right rows -> return $ [if y' == 10 then 0 else y' | [y] <- rows, y /= "", let y' = read y] | |
return (x, y) | |
train | |
:: forall m. KnownNat m | |
=> ℝ -> (L m InputLayerSize, HM.Vector Int) -> IO Params | |
train lambda samples = do | |
let params = CG.defaultParameters | |
{ CG.printFinal = True | |
, CG.printParams = True | |
, CG.verbose = CG.VeryVerbose -- CG.Verbose | |
, CG.maxItersFac = 50 / fromIntegral (params1Size + params2Size) | |
} | |
grad_tol = 0.00001 | |
theta0 <- randTheta | |
(theta, _result1, _stat1) <- CG.optimize params grad_tol (pack theta0) | |
(CG.VFunction (fst . cost2)) | |
(CG.VGradient (snd . cost2)) | |
(Just (CG.VCombined cost2)) | |
return $ unpack theta | |
where | |
cost2 :: HM.Vector ℝ -> (ℝ, HM.Vector ℝ) | |
cost2 = (id *** pack) . f . unpack | |
where | |
f :: Params -> (ℝ, Params) | |
f = cost lambda samples | |
pack :: Params -> HM.Vector ℝ | |
pack (theta1,theta2) = HM.flatten (unwrap theta1) VG.++ HM.flatten (unwrap theta2) | |
unpack :: HM.Vector ℝ -> Params | |
unpack xs = | |
case VG.splitAt params1Size xs of | |
(ys,zs) -> (matrix (VG.toList ys), matrix (VG.toList zs)) | |
randTheta :: IO Params | |
randTheta = do | |
gen <- Rand.createSystemRandom | |
theta1 <- matrix <$> replicateM params1Size (Rand.uniformR (-epsilonInit, epsilonInit) gen) | |
theta2 <- matrix <$> replicateM params2Size (Rand.uniformR (-epsilonInit, epsilonInit) gen) | |
return (theta1, theta2) | |
precision | |
:: forall m. KnownNat m | |
=> Params -> (L m InputLayerSize, HM.Vector Int) -> ℝ | |
precision theta (x,y) = VG.sum (VG.zipWith (\a b -> if a==b then 1 else 0) (predict theta x) y) / m | |
where | |
m = fromIntegral $ fst (size x) | |
main :: IO () | |
main = do | |
let trainSize = 200 | |
(x,y) <- loadData | |
gen <- Rand.createSystemRandom | |
indexes <- Rand.uniformShuffle (VG.enumFromN 0 (length x)) gen | |
let x' :: V.Vector (R InputLayerSize) | |
x' = VG.backpermute (V.fromList x) (VG.convert indexes) | |
y' :: HM.Vector Int | |
y' = VG.backpermute (VS.fromList y) indexes | |
(trainX, testX) = VG.splitAt trainSize x' | |
(trainY, testY) = VG.splitAt trainSize y' | |
let lambda = 1 | |
theta <- withRows (VG.toList trainX) $ \trainX' -> do | |
theta <- train lambda (trainX',trainY) | |
printf "precision (training data): %f\n" (precision theta (trainX',trainY)) | |
return theta | |
withRows (VG.toList testX) $ \testX' -> do | |
printf "precision (test data): %f\n" (precision theta (testX',testY)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE FlexibleContexts, ScopedTypeVariables #-} | |
import Control.Monad | |
import Control.Monad.ST | |
import Data.STRef | |
import qualified Data.Vector as V | |
import qualified Data.Vector.Storable as VS | |
import Data.Vector.Generic ((!)) | |
import qualified Data.Vector.Generic as VG | |
import qualified Data.Vector.Generic.Mutable as VM | |
import qualified Numeric.Optimization.Algorithms.HagerZhang05 as CG | |
import System.Exit | |
import qualified System.Random.MWC as Rand | |
import qualified System.Random.MWC.Distributions as Rand | |
import Text.CSV | |
import Text.Printf | |
h :: forall a v. (RealFloat a, VG.Vector v a) => v a -> v a -> v a | |
h theta x = a3 | |
where | |
theta1 i = VG.slice ((inputLayerSize+1) * i) (inputLayerSize+1) theta | |
theta2 i = VG.slice (theta1Size + (hiddenLayerSize+1) * i) (hiddenLayerSize+1) theta | |
a2 :: v a | |
a2 = VG.generate hiddenLayerSize $ \i -> sigmoid $ | |
let theta1_i = theta1 i | |
in theta1_i ! 0 + sum [ theta1_i!(j+1) * x!j | j <- [0..inputLayerSize-1] ] | |
a3 = VG.generate numLabel $ \i -> sigmoid $ | |
let theta2_i = theta2 i | |
in theta2_i ! 0 + sum [ theta2_i!(j+1) * a2!j | j <- [0..hiddenLayerSize-1] ] | |
predict :: forall a v. (RealFloat a, VG.Vector v a) => v a -> v a -> Int | |
predict theta x = VG.maxIndex (h theta x) | |
cost :: forall a v. (RealFloat a, VG.Vector v a) => a -> [(v a, Int)] -> v a -> (a, v a) | |
cost lambda samples theta = runST $ do | |
let m = fromIntegral (length samples) | |
theta1 i = VG.slice ((inputLayerSize+1) * i) (inputLayerSize+1) theta | |
theta2 i = VG.slice (theta1Size + (hiddenLayerSize+1) * i) (hiddenLayerSize+1) theta | |
valRef <- newSTRef 0 | |
thetaGrad <- VM.replicate thetaSize 0 | |
let theta1Grad = VM.slice 0 theta1Size thetaGrad | |
theta2Grad = VM.slice theta1Size theta2Size thetaGrad | |
forM_ samples $ \(x,y) -> do | |
let z2 :: v a | |
z2 = VG.generate hiddenLayerSize $ \i -> | |
let theta1_i = theta1 i | |
in theta1_i ! 0 + sum [ theta1_i!(j+1) * x!j | j <- [0..inputLayerSize-1] ] | |
a2 = VG.map sigmoid z2 | |
z3 :: v a | |
z3 = VG.generate numLabel $ \i -> | |
let theta2_i = theta2 i | |
in theta2_i ! 0 + sum [ theta2_i!(j+1) * a2!j | j <- [0..hiddenLayerSize-1] ] | |
a3 = VG.map sigmoid z3 | |
forM_ [0 .. numLabel-1] $ \k -> | |
modifySTRef' valRef (\x -> x - log (if k == y then a3!k else 1 - a3!k)) | |
let delta3 = VG.zipWith (-) a3 (VG.generate numLabel $ \i -> if i == y then 1 else 0) | |
delta2 = VG.zipWith (*) (VG.generate hiddenLayerSize (\j -> sum [delta3!i * theta2 i ! (1+j) | i <- [0..numLabel-1]])) (VG.map sigmoid' z2) | |
forM_ [0..numLabel-1] $ \i -> do | |
modify theta2Grad (+ delta3!i) (i*(hiddenLayerSize+1)) | |
forM_ [0..hiddenLayerSize-1] $ \j -> do | |
modify theta2Grad (+ (delta3!i * a2!j)) (i*(hiddenLayerSize+1)+1+j) | |
forM_ [0..hiddenLayerSize-1] $ \i -> do | |
modify theta1Grad (+ delta2!i) (i*(inputLayerSize+1)) | |
forM_ [0..inputLayerSize-1] $ \j -> do | |
modify theta1Grad (+ (delta2!i * x!j)) (i*(inputLayerSize+1)+1+j) | |
modifySTRef' valRef (/ m) | |
forM_ [0..thetaSize-1] $ \i -> modify thetaGrad (/ m) i | |
-- Regularization | |
modifySTRef' valRef $ | |
(+ (lambda / (2*m)) * (sum [x**2 | i <- [0..hiddenLayerSize-1], x <- VG.toList (VG.slice 1 inputLayerSize (theta1 i))] + | |
sum [x**2 | i <- [0..numLabel-1], x <- VG.toList (VG.slice 1 hiddenLayerSize (theta2 i))])) | |
forM_ [0..numLabel-1] $ \i -> do | |
forM_ [0..hiddenLayerSize-1] $ \j -> do | |
modify theta2Grad (+ (theta2 i ! (1+j) * lambda / m)) (i*(1+hiddenLayerSize)+1+j) | |
forM_ [0..hiddenLayerSize-1] $ \i -> do | |
forM_ [0..inputLayerSize-1] $ \j -> do | |
modify theta1Grad (+ (theta1 i ! (1+j) * lambda / m)) (i*(1+inputLayerSize)+1+j) | |
val <- readSTRef valRef | |
g <- VG.unsafeFreeze thetaGrad | |
return (val, g) | |
-- modify :: (PrimMonad m, MVector v a) => v (PrimState m) a -> (a -> a) -> Int -> m () | |
modify v f i = do | |
x <- VM.read v i | |
VM.write v i (f x) | |
sigmoid :: RealFloat a => a -> a | |
sigmoid x = 1 / (1 + exp (-x)) | |
sigmoid' :: RealFloat a => a -> a | |
sigmoid' x = y * (1 - y) | |
where y = sigmoid x | |
inputLayerSize = 400 | |
hiddenLayerSize = 25 | |
numLabel = 10 | |
thetaSize = theta1Size + theta2Size | |
theta1Size = (inputLayerSize+1) * hiddenLayerSize | |
theta2Size = (hiddenLayerSize+1) * numLabel | |
epsilonInit = 0.12 | |
{- | |
Data files are available from the following URLs. | |
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_X.csv.gz | |
* https://dl.dropboxusercontent.com/u/123796/gist/b553004a7ca76310e11c/data/ex4data1_y.csv | |
They are converted from ex4data1.mat using Octave as below. | |
> load('ex4data1.mat'); | |
> csvwrite("ex4data1_X.csv", X); | |
> csvwrite("ex4data1_y.csv", y); | |
-} | |
loadData :: VG.Vector v Double => IO [(v Double, Int)] | |
loadData = do | |
x <- do | |
ret <- parseCSVFromFile "ex4data1_X.csv" | |
case ret of | |
Left err -> print err >> exitFailure | |
Right rows -> return [VG.fromList [read x :: Double | x <- row] | row <- rows, row /= [""]] | |
y <- do | |
ret <- parseCSVFromFile "ex4data1_y.csv" | |
case ret of | |
Left err -> print err >> exitFailure | |
Right rows -> return [if y' == 10 then 0 else y' | [y] <- rows, y /= "", let y' = read y] | |
return $ zip x y | |
train :: Double -> [(VS.Vector Double, Int)] -> IO (VS.Vector Double) | |
train lambda samples = do | |
let params = CG.defaultParameters | |
{ CG.printFinal = True | |
, CG.printParams = True | |
, CG.verbose = CG.VeryVerbose -- CG.Verbose | |
, CG.maxItersFac = 50 / fromIntegral thetaSize | |
} | |
grad_tol = 0.00001 | |
(theta0 :: VS.Vector Double) <- randTheta | |
(theta, _result1, _stat1) <- CG.optimize params grad_tol theta0 | |
(CG.VFunction (fst . cost lambda samples)) | |
(CG.VGradient (snd . cost lambda samples)) | |
(Just (CG.VCombined (cost lambda samples))) | |
return theta | |
randTheta :: VG.Vector v Double => IO (v Double) | |
randTheta = do | |
gen <- Rand.createSystemRandom | |
VG.replicateM thetaSize (Rand.uniformR (-epsilonInit, epsilonInit) gen) | |
main :: IO () | |
main = do | |
let trainSize = 200 | |
samples <- loadData | |
(trainData, testData) <- do | |
gen <- Rand.createSystemRandom | |
samples2 <- Rand.uniformShuffle (V.fromList samples) gen | |
return (take trainSize (V.toList samples2), drop trainSize (V.toList samples2)) | |
let lambda = 1 | |
theta <- train lambda trainData | |
printf "precision (training data): %f\n" $ sum [1::Double | (x,y) <- trainData, predict theta x == y] / fromIntegral (length trainData) | |
printf "precision (test data): %f\n" $ sum [1::Double | (x,y) <- testData, predict theta x == y] / fromIntegral (length testData) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment