
\section{Peephole LSTM with Forget Gates in Pseudo-code}
\label{sec:LSTM_FgPH_Pseudo-code}

\newcommand{\Code}[1]{\bf \tt \small #1}
\newcommand{\Cmd}[1]{\it \bf \normalsize #1}
\newcommand{\lCmd}[1]{ \it \bf \large #1}
\newcommand{\llCmd}[1]{ \it \bf \Large #1}

%\Code{this is code}
%\Cmd{a comment}

%\fbox{
%\parbox{5cm}
%\parbox{0.95\textwidth} { 
\Code{}
\begin{tabbing}
\hspace{0.5cm} \= \hspace{3ex} \= \hspace{3ex} \=\hspace{3ex} \= \kill
 \llCmd{init network:}\\
  \>\lCmd{reset:} \Cmd{CECs:} $s_{c_j^v} \! = \!\hat{s}_{c_j^v}  \! = \! 0$;
    \Cmd{partials:} $dS \!=\!0$;
    \Cmd{activations:} $y \! = \! \hat{y} \! = \! 0$;\\
\llCmd{forward pass:} \\
\>\lCmd{input units:} \Code{$y=$ current external input;}\\
\>\lCmd{roll over:} \Cmd{activations:} $\hat{y} \! = \! y$; 
\Cmd{cell states:} $\hat{s}_{c_j^v} \! = \! s_{c_j^v}$;\\ 
%net input and activation in hidden layer\\
\> loop over memory blocks, indexed  $j$ $ \ \{$\\
\> \>\lCmd{Step 1a: input gates} (\ref{equ:z_in}):\\
\> \> \>$z_{{\rm in}_j} = \sum_{m} w_{{\rm in}_jm} \ \hat{y}_m +
\sum_{v=1}^{S_j} w_{{\rm in}_jc_j^v} \ \hat{s}_{c_j^v}$; $\ $
%network input to input gate\\
%\> \> \>
$y_{{\rm in}_{j}} = f_{{\rm in}_j}(z_{{\rm in}_j})$;\\[1ex]
%activation of input gate;\\
\>  \>\lCmd{Step 1b: forget gates} (\ref{equ:z_fg}):\\
\>  \> \>$z_{\varphi_j} = \sum_{m} w_{\varphi_jm} \ \hat{y}_m + \sum_{v=1}^{S_j} w_{\varphi_jc_j^v} \ \hat{s}_{c_j^v}$; $\ $
%network input to forget gate\\
%>  \> \>
$y_{\varphi_{j}} = f_{\varphi_j}(z_{\varphi_j})$;\\[1ex]
%activation of forget gate\\
\>  \>\lCmd{Step 1c: CECs, i.e the cell states}
(\ref{equ:s_c_in}, \ref{equ:s_c_Fg}):\\
\>  \> \>loop over the $S_j$ cells in block $j$, indexed $v$ $ \ \{$\\
\>  \> \> \>$z_{c_j^v} = \sum_m w_{c_j^v m} \ \hat{y}_m$; $\ $
%network input to cell\\
%>  \> \> \>
$s_{c_j^v} = y_{\varphi_j} \ \hat{s}_{c_j^v} +  y_{{\rm in}_{j}} \ g(z_{c_j^v})$; $\ \}$\\[0.5ex]
\>  \>\lCmd{Step 2:}\\
\>  \> \>\Cmd{output gate activation} (\ref{equ:z_out_peep}):\\ 
\>  \> \>$z_{{\rm out}_j} = \sum_{m} w_{{\rm out}_jm} \ \hat{y}_m +
\sum_{v=1}^{S_j} w_{{\rm out}_jc_j^v} \ s_{c_j^v}$; $\ $
%  network input to output gate\\
%>  \> \>
$y_{{\rm out}_{j}} = f_{{\rm out}_j}(z_{{\rm out}_j})$;\\[1ex]
%activation of output gate\\
\>  \> \>\Cmd{cell outputs} (\ref{equ:yCell}):\\
\>  \> \>loop over the $S_j$ cells in block $j$, indexed $v$ $ \ \{ \,$
%\>  \> \> \>
$y_{c_j^v} = y_{{\rm out}_{j}} \ s_{c_j^v}$; $ \, \}$\\
\> $\}$ end loop over memory blocks\\
\> \lCmd{output units} (\ref{equ:OutUnit_StdLSTM}): \normalsize $z_k = \sum_{m} w_{km} \ y_m$; $\ $ $y_k = f_k(z_k)$;\\
\> \lCmd{partial derivatives:}\\
%   \Cmd{for input gates, forget gates and cells}\\
\> loop over memory blocks, indexed  $j$ $ \ \{$\\
\>  \>loop over the $S_j$ cells in block $j$, indexed $v$ $ \ \{$\\
\>  \> \>\Cmd{cells} (\ref{equ:s_partial_s_c}), ($dS^{jv}_{cm} := \frac{\partial s_{c_j^v}}{\partial w_{c_j^vm}}$):\\[1ex]
\>  \> \>$dS^{jv}_{c m} = dS^{jv}_{c m} \ y_{\varphi_j} + g'(z_{c_j^v}) \ y_{{\rm in}_j} \ \hat{y}_m$;\\[1ex]
\>  \> \>\Cmd{input gates} (\ref{equ:s_partial_in}, \ref{equ:s_partial_in_peep}), ($dS^{jv}_{{\rm in},m}
:= \frac{\partial s_{c_j^v}}{\partial w_{{\rm in}_j m}}$ , $dS^{jv}_{{\rm
in},c_j^{v'}} := \frac{\partial s_{c_j^v}}{\partial w_{{\rm in}_j c_j^{v'}}}$):\\[1ex]
%$dS^{jv}_{{\rm in},m} = 0$\\
\>  \> \>$dS^{jv}_{{\rm in},m} = dS^{jv}_{{\rm in},m} \ y_{\varphi_j} +
g(z_{c_j^v}) \ f'_{{\rm in}_j}(z_{{\rm in}_j}) \ \hat{y}_m$;\\[1ex]
\>  \> \>loop over peephole connections from all cells, indexed $v'$ $ \ \{$\\
\>  \> \> \>$dS^{jv}_{{\rm in},c_j^{v'}} = dS^{jv}_{{\rm in},c_j^{v'}} \
y_{\varphi_j} + g(z_{c_j^v}) \ f'_{{\rm in}_j}(z_{{\rm in}_j}) \ \hat{s}_c^{v'}$; $\ \}$\\[1ex]
\>  \> \>\Cmd{forget gates} (\ref{equ:s_partial_fg}, \ref{equ:s_partial_fg_peep}), ($dS^{jv}_{\varphi m} := \frac{\partial s_{c_j^v}}{\partial w_{\varphi_j m}}$ ,  $dS^{jv}_{\varphi c_j^{v'}} := \frac{\partial s_{c_j^v}}{\partial w_{\varphi_j c_j^{v'}}}$):\\
%$dS^{jv}_{\varphi m} = 0$\\
\>  \> \>$dS^{jv}_{\varphi m} = dS^{jv}_{\varphi m} \ y_{\varphi_j} + \hat{s}_{c_j^v} \ f'_{\varphi_j}(z_{\varphi_j}) \ \hat{y}_m$;\\[1ex]
\>  \> \>loop over peephole connections from all cells, indexed $v'$ $ \ \{$\\
\>  \> \> \>$dS^{jv}_{\varphi c_j^{v'}} = dS^{jv}_{\varphi c_j^{v'}} \ y_{\varphi_j} + \hat{s}_{c_j^v} \ f'_{\varphi_j}(z_{\varphi_j}) \ \hat{s}_c^{v'}$; $\ \}$\\[1ex]
\> $\}$ \>$\}$ end loops over cells and memory blocks\\
%} %end comment
\end{tabbing}
%} % end parbox
%}

%\newpage

%\fbox{
%\parbox{0.95\textwidth} { 
\Code{}
\begin{tabbing}
\hspace{3ex} \= \hspace{3ex} \= \hspace{3ex} \=\hspace{3ex} \= \kill
\llCmd{backward pass \Cmd{(if error injected)}:} \\[1ex]
\>\lCmd{errors and $\delta$s:}\\[1ex]
 \>\Cmd{injection error:} $e_k = t_k-y_k$;\\[1ex]
% \>\lCmd{$\delta$s}:\\
 \>\Cmd{$\delta$s of output units} (\ref{equ:deltaOut}): $\delta_k = f'_k(z_k) \ e_k$;\\[1ex]
% \> \> non LSTM units (\ref{equ:DeltaHidden}): $\delta_i = f'_i(z_i) \left(\sum_{k} w_{ki} \ \delta_k\right)$\\
 \>loop over memory blocks, indexed  $j$ $ \ \{$\\
 \> \>\Cmd{$\delta$s of output gates} (\ref{equ:delta_OutGate}):\\[0.5ex]
 \> \>$\delta_{{\rm out}_j} = f'_{{\rm out}_j}(z_{{\rm out}_j}) \ \left( \sum_{v =1}^{S_j} \ s_{c_j^v} \sum_{k} w_{kc_j^v} \ \delta_k \right)$;\\[1ex]
 \> \>\Cmd{internal state error} (\ref{equ:e_s}):\\
 \> \>loop over the $S_j$ cells in block $j$, indexed $v$ $ \ \{$\\[0.5ex]
 \> \> \>$e_{s_{c_j^v}} = y_{{\rm out}_j} \ 
\left( \sum_{k} w_{kc_j^v} \ \delta_k \right)$; $ \ \}$\\[1ex]
 \>$\}$ end loop over memory blocks\\[1ex]

\>\lCmd{weight updates:}\\[1ex]
 \>\Cmd{output units} (\ref{equ:dwOut}): $\Delta w_{km} = \alpha \ \delta_k \ y_m$;\\[1ex]
 \>loop over memory blocks, indexed  $j$ $ \ \{$\\
 \> \>\Cmd{output gates} (\ref{equ:dwOutGate}):\\
 \> \>$\Delta w_{{\rm out},m} = \alpha \ \delta_{\rm out} \ \hat{y}_m$;
% \> \>
$\ $ $\Delta w_{{\rm out},c_j^v} = \alpha \ \delta_{\rm out} \ s_{c_j^v}$;\\[1ex]
 \> \>\Cmd{input gates} (\ref{equ:dwIn}): \\
 \> \>$\Delta w_{{\rm in},m} = \alpha \sum_{v=1}^{S_j} \ e_{s_{c_j^v}} \ dS^{jv}_{{\rm in},m}$;\\[1ex]
\>  \>loop over peephole connections from all cells, indexed $v'$ $ \ \{$\\
 \> \> \>$\Delta w_{{\rm in},c_j^{v'}} = \alpha \sum_{v=1}^{S_j} \ e_{s_{c_j^v}} \ dS^{jv}_{{\rm in},c_j^{v'}}$; $\ \}$\\[1ex]
 \> \> \Cmd{forget gates} (\ref{equ:dwFg}): \\
 \> \> $\Delta w_{\varphi m} = \alpha  \sum_{v=1}^{S_j} \ e_{s_{c_j^v}} \ dS^{jv}_{\varphi m}$;\\[1ex]
\>  \>loop over peephole connections from all cells, indexed $v'$ $ \ \{$\\
 \> \> \>$\Delta w_{\varphi c_j^{v'}} = \alpha \sum_{v=1}^{S_j} \ e_{s_{c_j^v}} \ dS^{jv}_{\varphi c_j^{v'}}$; $\ \}$\\[1ex]
 \> \>\Cmd{cells} (\ref{equ:dw_s_c}):\\
 \> \>loop over the $S_j$ cells in block $j$, indexed $v$ $ \ \{$\\
 \> \> \>$\Delta w_{c_j^vm} = \alpha \ e_{s_{c_j^v}} \ dS^{jv}_{c m}$; $ \ \}$;\\[1ex]
 \>$\}$ end loop over memory blocks
\end{tabbing}
%} % end parbox
%}


