Skip to content

Instantly share code, notes, and snippets.

Created April 24, 2010 16:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/718773e6176ce1bd8d71 to your computer and use it in GitHub Desktop.
Save anonymous/718773e6176ce1bd8d71 to your computer and use it in GitHub Desktop.
\documentclass{beamer}
\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{verbatim}
\usepackage{url}
\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{footline}[page number]
\title{About optimal sequence alignment}
\subtitle{A short glimpse into bioinformatics}
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}
\frametitle{Pairwise sequence alignment}
Assumptions:
\begin{itemize}
\item sequences $S_1$ and $S_2$ are homologous, they share a common ancestor;
\item differences between them are due to only two kinds of events, substitutions and insertion-deletions.
\end{itemize}
Strategy:
\begin{itemize}
\item choose a scoring matrix (reward for match, penalty for mismatch and gap);
\item compute the editing distance (number of matches, mismatches and gaps) to go from one sequence to the other;
\item keep the alignment with the highest score.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Needleman-Wunsch algorithm}
\begin{itemize}
\item Aim: find the optimal global alignment of sequences $S_1$ and $S_2$
\item Recursion rule:
\begin{align}
D(i,j) = max
\begin{cases}
D(i-1,j-1) + score( S_1[i], S_2[j] )\\
D(i-1,j) + gap\\
D(i,j-1) + gap
\end{cases}
\end{align}
\item Scoring scheme: identity=0 transition=-2 transversion=-5 gap=-10
\item Sequences: $S_1$=TTGT $S_2$=CTAGG
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Fill the matrix}
\begin{tabular}{l|cccccccccccc}
& & & C & & T & & A & & G & & G \\ \hline
& & & & & & & & & & & & \\
& \visible<2->{0} & \visible<3->{$\rightarrow$} & \visible<3->{-10} & \visible<4->{$\rightarrow$} & \visible<4->{-20} & \visible<5->{$\rightarrow$} & \visible<5->{-30} & \visible<5->{$\rightarrow$} & \visible<5->{-40} & \visible<5->{$\rightarrow$} & \visible<5->{-50} \\
& \visible<6->{$\downarrow$} & \visible<7->{$\searrow$} & \visible<7->{$\downarrow$} & \visible<9->{$\searrow$} & \visible<9->{$\downarrow$} & \visible<9->{$\searrow$} & \visible<9->{$\downarrow$} & \visible<9->{$\searrow$} & \visible<9->{$\downarrow$} & \visible<9->{$\searrow$} & \visible<9->{$\downarrow$} & \\
T & \visible<6->{-10} & \visible<7->{$\rightarrow$} & \visible<8->{-2} & \visible<9->{$\rightarrow$} & \visible<9->{-10} & \visible<9->{$\rightarrow$} & \visible<9->{-20} & \visible<9->{$\rightarrow$} & \visible<9->{-30} & \visible<9->{$\rightarrow$} & \visible<9->{-40} \\
& \visible<6->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \\
T & \visible<6->{-20} & \visible<10->{$\rightarrow$} & \visible<11->{-12} & \visible<10->{$\rightarrow$} & \visible<11->{-2} & \visible<10->{$\rightarrow$} & \visible<11->{-12} & \visible<10->{$\rightarrow$} & \visible<11->{-22} & \visible<10->{$\rightarrow$} & \visible<11->{-32} & \\
& \visible<6->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \\
G & \visible<6->{-30} & \visible<10->{$\rightarrow$} & \visible<11->{-22} & \visible<10->{$\rightarrow$} & \visible<11->{-12} & \visible<10->{$\rightarrow$} & \visible<11->{-4} & \visible<10->{$\rightarrow$} & \visible<11->{-12} & \visible<10->{$\rightarrow$} & \visible<11->{-22} & \\
& \visible<6->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \\
T & \visible<6->{-40} & \visible<10->{$\rightarrow$} & \visible<11->{-32} & \visible<10->{$\rightarrow$} & \visible<11->{-22} & \visible<10->{$\rightarrow$} & \visible<11->{-14} & \visible<10->{$\rightarrow$} & \visible<11->{-9} & \visible<10->{$\rightarrow$} & \visible<11->{-17} &
\end{tabular}
\end{frame}
\begin{frame}
\frametitle{Traceback}
\begin{tabular}{l|cccccccccccc}
& & & C & & T & & A & & G & & G \\ \hline
& & & & & & & & & & & & \\
& \color<7->{red}{0} & $\rightarrow$ & -10 & $\rightarrow$ & -20 & $\rightarrow$ & -30 & $\rightarrow$ & -40 & $\rightarrow$ & -50 \\
& $\downarrow$ & \color<7->{red}{$\searrow$} & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & \\
T & -10 & $\rightarrow$ & \color<6->{red}{-2} & $\rightarrow$ & -10 & $\rightarrow$ & -20 & $\rightarrow$ & -30 & $\rightarrow$ & -40 \\
& $\downarrow$ & $\searrow$ & $\downarrow$ & \color<6->{red}{$\searrow$} & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & \\
T & -20 & $\rightarrow$ & -12 & $\rightarrow$ & \color<5->{red}{-2} & \color<5->{red}{$\rightarrow$} & \color<4->{red}{-12} & $\rightarrow$ & -22 & $\rightarrow$ & -32 & \\
& $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & \color<4->{red}{$\searrow$} & $\downarrow$ & $\searrow$ & $\downarrow$ & \\
G & -30 & $\rightarrow$ & -22 & $\rightarrow$ & -12 & $\rightarrow$ & -4 & $\rightarrow$ & \color<3->{red}{-12} & $\rightarrow$ & -22 & \\
& $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & \color<3->{red}{$\searrow$} & $\downarrow$ & \\
T & -40 & $\rightarrow$ & -32 & $\rightarrow$ & -22 & $\rightarrow$ & -14 & $\rightarrow$ & -9 & $\rightarrow$ & \color<2->{red}{-17} &
\end{tabular}
\end{frame}
\begin{frame}[fragile]
\frametitle{Output}
Plot the optimal alignment:
\begin{verbatim}
CTAGG
*| |*
TT-GT
\end{verbatim}
Score: -17
Complexity in time: $O(nm)$
Complexity in memory: $O(nm)$
\end{frame}
\begin{frame}
\frametitle{Acknowledgments}
Bellman; Levenstein; Needleman and Wunsch; Sankoff and Sellers; Hirschberg; Smith and Waterman; Gotoh; Ukkonen, Myers and Fickett; and many others...
\vspace{1cm}
Want to know more? start reading!
\url{http://lectures.molgen.mpg.de/online_lectures.html}
\end{frame}
\end{document}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment