juanmc2005/feedforwardnn.hs

## feedforwardnn.hs
module NeuralNet (newFeedForward, predict, train, layers, activation, loss) where

-- A Feed Forward Neural Network implementation

import Data.Matrix
import System.Random

type Layer = Matrix Double
type Delta = Matrix Double

data Activation = Sigmoid | Tanh deriving Show
data Loss = SquaredError deriving Show
data NeuralNet = FeedForward [Layer] Activation Loss deriving Show


-------------------------------------------------------------------------------
------------------------- Activation and Loss Functions -----------------------
-------------------------------------------------------------------------------


-- activation function
actFunction :: Activation -> Double -> Double
actFunction Sigmoid x = 1 / (1 + e ** (-x)) where e = exp 1
actFunction Tanh x    = (e ** n - 1) / (e ** n + 1)
                         where e = exp 1
                               n = 2 * x


actFunctionV :: Activation -> Matrix Double -> Matrix Double
actFunctionV Sigmoid m = mapMatrix (actFunction Sigmoid) m
actFunctionV Tanh m    = mapMatrix (actFunction Tanh) m


-- activation function's derivative
actDerivative :: Activation -> Double -> Double
actDerivative Sigmoid x = s * (1 - s) where s = actFunction Sigmoid x
actDerivative Tanh x    = 1 - (actFunction Tanh x) ^ 2


actDerivativeV :: Activation -> Matrix Double -> Matrix Double
actDerivativeV Sigmoid m = elementwise (*) s (mapMatrix (1-) s)
                            where s = actFunctionV Sigmoid m
actDerivativeV Tanh m    = mapMatrix (1-) (mapMatrix (^2) (actFunctionV Tanh m))


-- loss function
lossFunction :: Loss -> Matrix Double -> Matrix Double -> Matrix Double
lossFunction SquaredError m1 m2 = mapMatrix (\x -> (x ^ 2) / 2) (m1 - m2)


-- loss function's derivative
lossDerivative :: Loss -> Matrix Double -> Matrix Double -> Matrix Double
lossDerivative SquaredError m1 m2 = m1 - m2


-------------------------------------------------------------------------------
-------------------------- Feed Forward Neural Network ------------------------
-------------------------------------------------------------------------------


-- a feed forward neural network with initialized weights
newFeedForward :: [Int] -> Activation -> Loss -> NeuralNet
newFeedForward [] _ _     = error "No architecture defined"
newFeedForward (n:ns) a l = FeedForward (buildLayers ns n) a l


-- layers with initialized weights
buildLayers :: [Int] -> Int -> [Layer]
buildLayers [] _     = []
buildLayers (n:ns) x = (matrix n x (const rnd)):(buildLayers ns n)
                        where rnd = 1 --getStdRandom (randomR (0.0,1.0))


-- layers accessor
layers :: NeuralNet -> [Layer]
layers (FeedForward ls _ _) = ls


-- activation function accessor
activation :: NeuralNet -> Activation
activation (FeedForward _ a _) = a


-- loss function accessor
loss :: NeuralNet -> Loss
loss (FeedForward _ _ l) = l


-- predicts an output for a given input
predict :: Matrix Double -> NeuralNet -> Matrix Double
-- Precondition: input is a column vector
predict _ (FeedForward [] _ _) = error "Cannot predict empty input"
predict m (FeedForward ls a _) = last (forward a m ls)


train :: Int -> [Matrix Double] -> [Matrix Double] -> Double -> NeuralNet -> NeuralNet
train n xs ys eta nn = last (trainN n xs ys eta nn)


train' :: [Matrix Double] -> [Matrix Double] -> Double -> NeuralNet -> [NeuralNet]
train' [] _ _ _             = []
train' (x:xs) (y:ys) eta nn = trainedNN:(train' xs ys eta trainedNN)
                               where trainedNN = backprop x y eta nn


trainOneEpoch :: [Matrix Double] -> [Matrix Double] -> Double -> NeuralNet -> NeuralNet
trainOneEpoch xs ys eta = last . (train' xs ys eta)


trainN :: Int -> [Matrix Double] -> [Matrix Double] -> Double -> NeuralNet -> [NeuralNet]
trainN 0 _ _ _ _      = []
trainN n xs ys eta nn = epochTrainedNN:(trainN (n - 1) xs ys eta epochTrainedNN)
                         where epochTrainedNN = trainOneEpoch xs ys eta nn


-- feed forward algorithm
forward :: Activation -> Matrix Double -> [Layer] -> [Matrix Double]
-- Precondition 1: layers is not empty
-- Precondition 2: input is a column vector
forward a m ls = m:(map (mapMatrix (actFunction a)) (weightedSums a m ls))


weightedSums :: Activation -> Matrix Double -> [Layer] -> [Matrix Double]
-- Precondition 1: layers is not empty
-- Precondition 2: input is a column vector
weightedSums a m []     = []
weightedSums a m (l:ls) = out:(weightedSums a (mapMatrix (actFunction a) out) ls)
                           where out = l * m


-- helper function to return the weighted sums for a given input in reverse order
-- (Useful for backpropagation functions)
reverseZs :: Activation -> Matrix Double -> [Layer] -> [Matrix Double]
reverseZs actF x ls = reverse (weightedSums actF x ls)


-- trains the network given one input vector
backprop :: Matrix Double -> Matrix Double -> Double -> NeuralNet -> NeuralNet
backprop _ _ _ (FeedForward [] _ _)         = error "Cannot backpropagate on empty network"
backprop x y eta (FeedForward ls actF errF) = FeedForward (gradientDescent eta grad ls) actF errF
                                               where grad = gradient (init (forward actF x ls))
                                                                     (backwards (reverseZs actF x ls) y actF errF ls)


-- Calculates deltas for each layer
backwards :: [Matrix Double] -> Matrix Double -> Activation -> Loss -> [Layer] -> [Delta]
-- Precondition 1: layers and weights are not empty
-- Precondition 2: layers and weights are in reverse order
-- Precondition 3: layers size equals weights size
backwards (z:zs) y actF errF (l:ls) = reverse (d:(backwardDeltas zs d actF ls))
                                    where d = outputDelta z y actF errF


backwardDeltas :: [Matrix Double] -> Delta -> Activation -> [Layer] -> [Delta]
backwardDeltas _ _ _ []             = []
backwardDeltas (w:ws) dnext actF (l:ls) = d:(backwardDeltas ws d actF ls)
                                       where d = hiddenDelta l dnext w actF


outputDelta :: Matrix Double -> Matrix Double -> Activation -> Loss -> Delta
outputDelta z y actF errF = elementwise (*) (j' (a' z) y) (a' z)
                             where a' = actDerivativeV actF
                                   j' = lossDerivative errF


hiddenDelta :: Layer -> Delta -> Matrix Double -> Activation -> Delta
hiddenDelta lnext dnext z actF = elementwise (*) ((transpose lnext) * dnext) (actDerivativeV actF z)


-- all adjusted layers by applying gradient descent
gradientDescent :: Double -> [Matrix Double] -> [Layer] -> [Layer]
gradientDescent _ _ []            = []
gradientDescent _ [] _            = []
gradientDescent eta (g:gs) (l:ls) = (adjustedWeights eta g l):(gradientDescent eta gs ls)


-- applies delta rule given the derivatives of a layer
adjustedWeights :: Double -> Matrix Double -> Layer -> Layer
adjustedWeights eta g l = l - (mapMatrix (*eta) g)


-- gradient of the whole network by multiplying activations and deltas obtained with backprop
gradient :: [Matrix Double] -> [Delta] -> [Matrix Double]
gradient [] _          = []
gradient _ []          = []
gradient (a:as) (d:ds) = (wDerivatives a d):(gradient as ds)


-- computes the derivatives of a matrix of weights given its deltas and activations matrix
wDerivatives :: Matrix Double -> Delta -> Matrix Double
wDerivatives aprev d = fromLists (wDerivativesFromList (toList aprev) (toList d))


-- helper function to compute the same derivatives as wDerivatives but with recursion on lists
wDerivativesFromList :: [Double] -> [Double] -> [[Double]]
wDerivativesFromList as = foldr (\d res -> (map (*d) as):res) []
--wDerivativesFromList _ []      = []
--wDerivativesFromList as (d:ds) = (map (*d) as):(wDerivativesFromList as ds)


-------------------------------------------------------------------------------
----------------------------------- Utils -------------------------------------
-------------------------------------------------------------------------------


-- applies a function to all matrix elements
mapMatrix :: (a -> b) -> Matrix a -> Matrix b
mapMatrix f m = fromLists (map (map f) (toLists m))
	module NeuralNet (newFeedForward, predict, train, layers, activation, loss) where

	-- A Feed Forward Neural Network implementation

	import Data.Matrix
	import System.Random

	type Layer = Matrix Double
	type Delta = Matrix Double

	data Activation = Sigmoid \| Tanh deriving Show
	data Loss = SquaredError deriving Show
	data NeuralNet = FeedForward [Layer] Activation Loss deriving Show


	-------------------------------------------------------------------------------
	------------------------- Activation and Loss Functions -----------------------
	-------------------------------------------------------------------------------


	-- activation function
	actFunction :: Activation -> Double -> Double
	actFunction Sigmoid x = 1 / (1 + e ** (-x)) where e = exp 1
	actFunction Tanh x = (e n - 1) / (e n + 1)
	where e = exp 1
	n = 2 * x


	actFunctionV :: Activation -> Matrix Double -> Matrix Double
	actFunctionV Sigmoid m = mapMatrix (actFunction Sigmoid) m
	actFunctionV Tanh m = mapMatrix (actFunction Tanh) m


	-- activation function's derivative
	actDerivative :: Activation -> Double -> Double
	actDerivative Sigmoid x = s * (1 - s) where s = actFunction Sigmoid x
	actDerivative Tanh x = 1 - (actFunction Tanh x) ^ 2


	actDerivativeV :: Activation -> Matrix Double -> Matrix Double
	actDerivativeV Sigmoid m = elementwise (*) s (mapMatrix (1-) s)
	where s = actFunctionV Sigmoid m
	actDerivativeV Tanh m = mapMatrix (1-) (mapMatrix (^2) (actFunctionV Tanh m))


	-- loss function
	lossFunction :: Loss -> Matrix Double -> Matrix Double -> Matrix Double
	lossFunction SquaredError m1 m2 = mapMatrix (\x -> (x ^ 2) / 2) (m1 - m2)


	-- loss function's derivative
	lossDerivative :: Loss -> Matrix Double -> Matrix Double -> Matrix Double
	lossDerivative SquaredError m1 m2 = m1 - m2


	-------------------------------------------------------------------------------
	-------------------------- Feed Forward Neural Network ------------------------
	-------------------------------------------------------------------------------


	-- a feed forward neural network with initialized weights
	newFeedForward :: [Int] -> Activation -> Loss -> NeuralNet
	newFeedForward [] _ _ = error "No architecture defined"
	newFeedForward (n:ns) a l = FeedForward (buildLayers ns n) a l


	-- layers with initialized weights
	buildLayers :: [Int] -> Int -> [Layer]
	buildLayers [] _ = []
	buildLayers (n:ns) x = (matrix n x (const rnd)):(buildLayers ns n)
	where rnd = 1 --getStdRandom (randomR (0.0,1.0))


	-- layers accessor
	layers :: NeuralNet -> [Layer]
	layers (FeedForward ls _ _) = ls


	-- activation function accessor
	activation :: NeuralNet -> Activation
	activation (FeedForward _ a _) = a


	-- loss function accessor
	loss :: NeuralNet -> Loss
	loss (FeedForward _ _ l) = l


	-- predicts an output for a given input
	predict :: Matrix Double -> NeuralNet -> Matrix Double
	-- Precondition: input is a column vector
	predict _ (FeedForward [] _ _) = error "Cannot predict empty input"
	predict m (FeedForward ls a _) = last (forward a m ls)


	train :: Int -> [Matrix Double] -> [Matrix Double] -> Double -> NeuralNet -> NeuralNet
	train n xs ys eta nn = last (trainN n xs ys eta nn)


	train' :: [Matrix Double] -> [Matrix Double] -> Double -> NeuralNet -> [NeuralNet]
	train' [] _ _ _ = []
	train' (x:xs) (y:ys) eta nn = trainedNN:(train' xs ys eta trainedNN)
	where trainedNN = backprop x y eta nn


	trainOneEpoch :: [Matrix Double] -> [Matrix Double] -> Double -> NeuralNet -> NeuralNet
	trainOneEpoch xs ys eta = last . (train' xs ys eta)


	trainN :: Int -> [Matrix Double] -> [Matrix Double] -> Double -> NeuralNet -> [NeuralNet]
	trainN 0 _ _ _ _ = []
	trainN n xs ys eta nn = epochTrainedNN:(trainN (n - 1) xs ys eta epochTrainedNN)
	where epochTrainedNN = trainOneEpoch xs ys eta nn


	-- feed forward algorithm
	forward :: Activation -> Matrix Double -> [Layer] -> [Matrix Double]
	-- Precondition 1: layers is not empty
	-- Precondition 2: input is a column vector
	forward a m ls = m:(map (mapMatrix (actFunction a)) (weightedSums a m ls))


	weightedSums :: Activation -> Matrix Double -> [Layer] -> [Matrix Double]
	-- Precondition 1: layers is not empty
	-- Precondition 2: input is a column vector
	weightedSums a m [] = []
	weightedSums a m (l:ls) = out:(weightedSums a (mapMatrix (actFunction a) out) ls)
	where out = l * m


	-- helper function to return the weighted sums for a given input in reverse order
	-- (Useful for backpropagation functions)
	reverseZs :: Activation -> Matrix Double -> [Layer] -> [Matrix Double]
	reverseZs actF x ls = reverse (weightedSums actF x ls)


	-- trains the network given one input vector
	backprop :: Matrix Double -> Matrix Double -> Double -> NeuralNet -> NeuralNet
	backprop _ _ _ (FeedForward [] _ _) = error "Cannot backpropagate on empty network"
	backprop x y eta (FeedForward ls actF errF) = FeedForward (gradientDescent eta grad ls) actF errF
	where grad = gradient (init (forward actF x ls))
	(backwards (reverseZs actF x ls) y actF errF ls)


	-- Calculates deltas for each layer
	backwards :: [Matrix Double] -> Matrix Double -> Activation -> Loss -> [Layer] -> [Delta]
	-- Precondition 1: layers and weights are not empty
	-- Precondition 2: layers and weights are in reverse order
	-- Precondition 3: layers size equals weights size
	backwards (z:zs) y actF errF (l:ls) = reverse (d:(backwardDeltas zs d actF ls))
	where d = outputDelta z y actF errF


	backwardDeltas :: [Matrix Double] -> Delta -> Activation -> [Layer] -> [Delta]
	backwardDeltas _ _ _ [] = []
	backwardDeltas (w:ws) dnext actF (l:ls) = d:(backwardDeltas ws d actF ls)
	where d = hiddenDelta l dnext w actF


	outputDelta :: Matrix Double -> Matrix Double -> Activation -> Loss -> Delta
	outputDelta z y actF errF = elementwise (*) (j' (a' z) y) (a' z)
	where a' = actDerivativeV actF
	j' = lossDerivative errF


	hiddenDelta :: Layer -> Delta -> Matrix Double -> Activation -> Delta
	hiddenDelta lnext dnext z actF = elementwise () ((transpose lnext) dnext) (actDerivativeV actF z)


	-- all adjusted layers by applying gradient descent
	gradientDescent :: Double -> [Matrix Double] -> [Layer] -> [Layer]
	gradientDescent _ _ [] = []
	gradientDescent _ [] _ = []
	gradientDescent eta (g:gs) (l:ls) = (adjustedWeights eta g l):(gradientDescent eta gs ls)


	-- applies delta rule given the derivatives of a layer
	adjustedWeights :: Double -> Matrix Double -> Layer -> Layer
	adjustedWeights eta g l = l - (mapMatrix (*eta) g)


	-- gradient of the whole network by multiplying activations and deltas obtained with backprop
	gradient :: [Matrix Double] -> [Delta] -> [Matrix Double]
	gradient [] _ = []
	gradient _ [] = []
	gradient (a:as) (d:ds) = (wDerivatives a d):(gradient as ds)


	-- computes the derivatives of a matrix of weights given its deltas and activations matrix
	wDerivatives :: Matrix Double -> Delta -> Matrix Double
	wDerivatives aprev d = fromLists (wDerivativesFromList (toList aprev) (toList d))


	-- helper function to compute the same derivatives as wDerivatives but with recursion on lists
	wDerivativesFromList :: [Double] -> [Double] -> [[Double]]
	wDerivativesFromList as = foldr (\d res -> (map (*d) as):res) []
	--wDerivativesFromList _ [] = []
	--wDerivativesFromList as (d:ds) = (map (*d) as):(wDerivativesFromList as ds)


	-------------------------------------------------------------------------------
	----------------------------------- Utils -------------------------------------
	-------------------------------------------------------------------------------


	-- applies a function to all matrix elements
	mapMatrix :: (a -> b) -> Matrix a -> Matrix b
	mapMatrix f m = fromLists (map (map f) (toLists m))