Skip to content

Instantly share code, notes, and snippets.

@DaHoC
Created January 21, 2019 08:35
Show Gist options
  • Save DaHoC/d4d189d9f703f920f2a4a6ef85dae1cd to your computer and use it in GitHub Desktop.
Save DaHoC/d4d189d9f703f920f2a4a6ef85dae1cd to your computer and use it in GitHub Desktop.
Derivation of "backpropagation of error" formula for a Multi-Layer-Perceptron, written in LaTeX
\documentclass[10pt, a4paper]{article}
\usepackage[english,ngerman]{babel}
\usepackage{amsmath, amssymb, wasysym}
\usepackage{textcomp}
\usepackage{graphicx} % Graphics package
%\usepackage{graphs} % c.f. http://www8.cs.umu.se/~drewes/graphs/
\usepackage{tikz}
\usepackage[T1]{fontenc}
% \usepackage[latin9]{inputenc} % Encoding
\usepackage[utf8]{inputenc} % Encoding
\usepackage{hyperref}
\usepackage{caption} % Package in order to change image captions
\usepackage{rotating} % Package for rotating elements
\usepackage{color}
\usepackage{verbatim}
\usepackage{tikz}
\hypersetup{pdfborder=0 0 0}
% Abstand nach Bildunterschriften etwas vergrößern
%\addtolength{\belowcaptionskip}{0.2cm}
% Listensymbol einstellen
\renewcommand{\labelitemii}{$\diamond$}
% Format der Bildunterschriften ändern
\renewcommand{\captionfont}{\small\itshape}
% Schriftart setzen
\renewcommand{\familydefault}{\sfdefault}
\definecolor{red}{rgb}{0.4, 0.0, 0.0}
\definecolor{green}{rgb}{0.0, 0.4, 0.0}
\definecolor{blue}{rgb}{0.0, 0.0, 0.4}
\definecolor{magenta}{rgb}{0.4, 0.4, 0.0}
\definecolor{orange}{rgb}{0.2, 0.2, 0.0}
% Schriftart für Überschriften setzen
%\setkomafont{sectioning}{\bf\rmfamily}
% Seitenränder setzen
\usepackage[top=1cm, left=0.5cm, right=0.5cm, bottom=1.2cm]{geometry}
%% And now it goes looooose :)
\begin{document}
\begin{center}
{\LARGE Derivation \& Operation of Backpropagation of Error}
\end{center}
\begin{itemize}
%\item[]
%\underline{Zunächst Nomenklatur des Netzausschnittes:}
\begin{comment}
\item[]
\underline{Nähere Informationen:}
\\
Hierin wird beschrieben, wie ein künstliches Neuronales Netz des Typs Multi-Layer-Perzeptron mit Backpropagation of Error trainiert wird.
\\
Es wird Grundwissen vorausgesetzt, interessierte Fachfremde verweise ich auf:
\\
\url{http://dkriesel.com/science/neural_networks}.
\item[]
\underline{Netztopologie:}
\def\layersep{2.5cm}
\begin{tikzpicture}[shorten >=1pt,->,draw=black!50, node distance=\layersep]
\tikzstyle{every pin edge}=[<-,shorten <=1pt]
\tikzstyle{neuron}=[circle,fill=black!25,minimum size=17pt,inner sep=0pt]
\tikzstyle{input neuron}=[neuron, fill=green!50];
\tikzstyle{output neuron}=[neuron, fill=red!50];
\tikzstyle{hidden neuron}=[neuron, fill=blue!50];
\tikzstyle{annot} = [text width=4em, text centered]
% Draw the input layer nodes
\foreach \name / \y in {1,...,3}
% This is the same as writing \foreach \name / \y in {1/1,2/2,3/3,4/4}
\node[input neuron, pin=left:$x_\y$] (I-\name) at (0,-\y cm) {$\diagup$};
\node[] (I-4) at (0,-4 cm) {$\vdots$};
\node[input neuron, pin=left:$x_N$] (I-5) at (0,-5 cm) {$\diagup$};
% Draw the hidden layer nodes
\foreach \name / \y in {1,...,5}
\path[yshift=0.5cm]
node[hidden neuron] (H-\name) at (\layersep,-\y cm) {$\frac{S}{\sum}$};
% Draw the output layer nodes
\foreach \name / \y in {1,...,3}
% \path[yshift=1.0cm]
\node[output neuron,pin={[pin edge={->}]right:$y_\y$}, right of=H-3] (Y-\name) at (\layersep,-\y cm) {};
\node[] (Y-4) at (2*\layersep,-4 cm) {$\vdots$};
\node[output neuron,pin={[pin edge={->}]right:$y_M$}] (Y-5) at (2*\layersep,-5 cm) {};
% Connect every node in the input layer with every node in the
% hidden layer.
\foreach \source in {1,...,3}
\foreach \dest in {1,...,5}
\path (I-\source) edge (H-\dest);
\foreach \source in {5,...,5}
\foreach \dest in {1,...,5}
\path (I-\source) edge (H-\dest);
% Connect every node in the hidden layer with the output layer
\foreach \source in {1,...,5}
\foreach \dest in {1,...,3,5}
\path (H-\source) edge (Y-\dest);
% Annotate the layers
\node[annot,above of=H-1, node distance=1cm] (hl) {Hidden layer};
\node[annot,left of=hl] {Input layer};
\node[annot,right of=hl] {Output layer};
\end{tikzpicture}
\end{comment}
\item[]
\underline{Error function:}
\begin{align*}
F &= \sum_{p \in P} \ ^pE \ \ \ \text{with $p$ being pattern in training set $P$} \\
^p E &= \frac{1}{2} \sum_{j \in M} \left(\hat{y}_m - y_m \right)^2 \ \ \ \text{with $\hat{y}_m$ being the teacher of $m$-th output $y_m$}
\end{align*}
\item[]
\underline{Weight function:}
\begin{align*}
^p \vartriangle w_{hm} &\sim - \nabla_w \cdot \ ^p E \ \ \ \text{with $w_{hm}$ being the weight from neuron $h$ to neuron $m$} \\
\vartriangle w_{hm} &= - \eta \textcolor{red}{\frac{\partial E \left( w_{hm} \right) }{ \partial w_{hm}} } \ \ \ \text{with $\eta$ being the learning rate}
\end{align*}
\item[]
\underline{For output neurons:}
$net_m = \sum_{i=0}^H w_{im} \tilde{o}_i$
$o_m = y_m = f_m(net_m) $
$ \textcolor{red}{\frac{\partial E \left( w_{hm} \right) }{ \partial w_{hm} }} = \textcolor{magenta}{\frac{\partial E }{ \partial net_m }} \cdot \textcolor{green}{\frac{ \partial net_m }{ \partial w_{hm} }}$
\begin{align*}
\textcolor{green}{\frac{\partial net_m }{ \partial w_{hm} }} &= \frac{ \partial }{ \partial w_{hm} } \cdot net_m \\
&= \frac{ \partial }{ \partial w_{hm} } \sum_{i=0}^H w_{im} \tilde{o}_i \\
&= \sum_{i=0}^H \frac{ \partial }{ \partial w_{hm} } w_{im} \tilde{o}_i \\
&= \frac{ \partial }{ \partial w_{hm} } \tilde{o}_h w_{hm} \\
&= \text{\textcolor{green}{\framebox{$ \tilde{o}_h $}}} \\
\\
\textcolor{magenta}{\frac{\partial E }{ \partial net_m }} &= \frac{ \partial E }{ \partial y_m } \cdot \textcolor{orange}{\frac{ \partial y_m }{ \partial net_m }} \ \left( = \textcolor{magenta}{- \delta_m} \right) \\
&= \frac{ \partial E }{ \partial y_m } \cdot \frac{ \partial }{ \partial net_m } f_m(net_m) \\
&= \underbrace{\textcolor{blue}{\frac{ \partial E }{ \partial y_m }} \cdot \text{\textcolor{orange}{\framebox{$ f'_m (net_m) $}}}}_{=: -\delta_m} \\
\\
\textcolor{blue}{\frac{ \partial E }{ \partial y_m }} &= \frac{ \partial }{ \partial y_m } \cdot \frac{1}{2} \sum_{j=1}^M \left( \hat{y}_j - y_j \right)^2 \\
&= \text{\textcolor{blue}{\framebox{$ - (\hat{y}_m - y_m) $}}} \\
\end{align*}
\begin{align*}
\vartriangle w_{hm} &= - \eta \textcolor{red}{\frac{ \partial E }{ \partial w_{hm} }} \\
&= \textcolor{blue}{-} \eta \textcolor{blue}{\frac{ \partial E }{ \partial y_m }} \textcolor{orange}{\frac{ \partial y_m}{ \partial net_m}} \textcolor{green}{\frac{ \partial net_m}{ \partial w_{hm}}} \\
&= \eta \textcolor{blue}{\left( \hat{y}_m - y_m \right)} \textcolor{orange}{f'_m(net_m)} \textcolor{green}{\tilde{o}_h}
\end{align*}
\framebox{$\delta_m = \textcolor{blue}{\left( \hat{y}_m - y_m \right)} \cdot \textcolor{orange}{f'_m(net_m)}$}
\framebox{$ \vartriangle w_{hm} = \eta \cdot \delta_m \cdot \tilde{o}_h $}
Widrow-Hoff-Rule / $\delta$-Rule
\\
\item[]
\underline{For hidden neurons:}
$net_h = \sum_{i=0}^H w_{ih} \tilde{o}_i$
$ \textcolor{red}{\frac{\partial E }{ \partial w_{kh} }} = \textcolor{magenta}{\frac{\partial E }{ \partial net_h }} \cdot \textcolor{green}{\frac{ \partial net_h }{ \partial w_{kh} } }$
\begin{align*}
\delta_h &= - \textcolor{magenta}{\frac{ \partial E }{ \partial net_h}} \\
&= \textcolor{blue}{- \frac{ \partial E }{ \partial o_h}} \cdot \textcolor{orange}{\frac{ \partial o_h }{ \partial net_h}} \\
\\
\textcolor{blue}{- \frac{ \partial E }{ \partial o_h}} &= - \frac{ \partial E \left( \underline{net}_{l=1} , \underline{net}_{l=2} , \hdots , \underline{net}_{l=L} \right) }{ \partial o_h} \\
&= \sum_{l=1}^L \left( - \frac{ \partial E}{ \partial \underline{net}_l } \right) \cdot \frac{ \partial \underline{net}_l }{ \partial o_h} \\
&= \sum_{l=1}^L \underline{\delta}_l \cdot \frac{ \partial }{ \partial o_h} \sum_{j=0}^H \underline{w}_{jl} \cdot o_j \\
&= \textcolor{blue}{\sum_{l=1}^L \underline{\delta}_l \cdot \underline{w}_{hl}}
\end{align*}
\framebox{$\delta_h = \textcolor{blue}{\sum_{l=1}^L \left( \underline{\delta}_l \cdot \underline{w}_{hl} \right)} \cdot \textcolor{orange}{f'\left( net_h \right)}$}
\framebox{$ \vartriangle w_{kh} = \eta \cdot \delta_h \cdot \tilde{o}_k $}
\begin{comment}
\item[]
\underline{Kontakt:}
\\
hendriks@cs.uni-bonn.de
\end{comment}
\end{itemize}
\end{document}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment