-
-
Save anonymous/718773e6176ce1bd8d71 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\documentclass{beamer} | |
\usepackage[utf8]{inputenc} | |
\usepackage{amsmath} | |
\usepackage{verbatim} | |
\usepackage{url} | |
\setbeamertemplate{navigation symbols}{} | |
\setbeamertemplate{footline}[page number] | |
\title{About optimal sequence alignment} | |
\subtitle{A short glimpse into bioinformatics} | |
\begin{document} | |
\begin{frame} | |
\titlepage | |
\end{frame} | |
\begin{frame} | |
\frametitle{Pairwise sequence alignment} | |
Assumptions: | |
\begin{itemize} | |
\item sequences $S_1$ and $S_2$ are homologous, they share a common ancestor; | |
\item differences between them are due to only two kinds of events, substitutions and insertion-deletions. | |
\end{itemize} | |
Strategy: | |
\begin{itemize} | |
\item choose a scoring matrix (reward for match, penalty for mismatch and gap); | |
\item compute the editing distance (number of matches, mismatches and gaps) to go from one sequence to the other; | |
\item keep the alignment with the highest score. | |
\end{itemize} | |
\end{frame} | |
\begin{frame} | |
\frametitle{Needleman-Wunsch algorithm} | |
\begin{itemize} | |
\item Aim: find the optimal global alignment of sequences $S_1$ and $S_2$ | |
\item Recursion rule: | |
\begin{align} | |
D(i,j) = max | |
\begin{cases} | |
D(i-1,j-1) + score( S_1[i], S_2[j] )\\ | |
D(i-1,j) + gap\\ | |
D(i,j-1) + gap | |
\end{cases} | |
\end{align} | |
\item Scoring scheme: identity=0 transition=-2 transversion=-5 gap=-10 | |
\item Sequences: $S_1$=TTGT $S_2$=CTAGG | |
\end{itemize} | |
\end{frame} | |
\begin{frame} | |
\frametitle{Fill the matrix} | |
\begin{tabular}{l|cccccccccccc} | |
& & & C & & T & & A & & G & & G \\ \hline | |
& & & & & & & & & & & & \\ | |
& \visible<2->{0} & \visible<3->{$\rightarrow$} & \visible<3->{-10} & \visible<4->{$\rightarrow$} & \visible<4->{-20} & \visible<5->{$\rightarrow$} & \visible<5->{-30} & \visible<5->{$\rightarrow$} & \visible<5->{-40} & \visible<5->{$\rightarrow$} & \visible<5->{-50} \\ | |
& \visible<6->{$\downarrow$} & \visible<7->{$\searrow$} & \visible<7->{$\downarrow$} & \visible<9->{$\searrow$} & \visible<9->{$\downarrow$} & \visible<9->{$\searrow$} & \visible<9->{$\downarrow$} & \visible<9->{$\searrow$} & \visible<9->{$\downarrow$} & \visible<9->{$\searrow$} & \visible<9->{$\downarrow$} & \\ | |
T & \visible<6->{-10} & \visible<7->{$\rightarrow$} & \visible<8->{-2} & \visible<9->{$\rightarrow$} & \visible<9->{-10} & \visible<9->{$\rightarrow$} & \visible<9->{-20} & \visible<9->{$\rightarrow$} & \visible<9->{-30} & \visible<9->{$\rightarrow$} & \visible<9->{-40} \\ | |
& \visible<6->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \\ | |
T & \visible<6->{-20} & \visible<10->{$\rightarrow$} & \visible<11->{-12} & \visible<10->{$\rightarrow$} & \visible<11->{-2} & \visible<10->{$\rightarrow$} & \visible<11->{-12} & \visible<10->{$\rightarrow$} & \visible<11->{-22} & \visible<10->{$\rightarrow$} & \visible<11->{-32} & \\ | |
& \visible<6->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \\ | |
G & \visible<6->{-30} & \visible<10->{$\rightarrow$} & \visible<11->{-22} & \visible<10->{$\rightarrow$} & \visible<11->{-12} & \visible<10->{$\rightarrow$} & \visible<11->{-4} & \visible<10->{$\rightarrow$} & \visible<11->{-12} & \visible<10->{$\rightarrow$} & \visible<11->{-22} & \\ | |
& \visible<6->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \visible<10->{$\searrow$} & \visible<10->{$\downarrow$} & \\ | |
T & \visible<6->{-40} & \visible<10->{$\rightarrow$} & \visible<11->{-32} & \visible<10->{$\rightarrow$} & \visible<11->{-22} & \visible<10->{$\rightarrow$} & \visible<11->{-14} & \visible<10->{$\rightarrow$} & \visible<11->{-9} & \visible<10->{$\rightarrow$} & \visible<11->{-17} & | |
\end{tabular} | |
\end{frame} | |
\begin{frame} | |
\frametitle{Traceback} | |
\begin{tabular}{l|cccccccccccc} | |
& & & C & & T & & A & & G & & G \\ \hline | |
& & & & & & & & & & & & \\ | |
& \color<7->{red}{0} & $\rightarrow$ & -10 & $\rightarrow$ & -20 & $\rightarrow$ & -30 & $\rightarrow$ & -40 & $\rightarrow$ & -50 \\ | |
& $\downarrow$ & \color<7->{red}{$\searrow$} & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & \\ | |
T & -10 & $\rightarrow$ & \color<6->{red}{-2} & $\rightarrow$ & -10 & $\rightarrow$ & -20 & $\rightarrow$ & -30 & $\rightarrow$ & -40 \\ | |
& $\downarrow$ & $\searrow$ & $\downarrow$ & \color<6->{red}{$\searrow$} & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & \\ | |
T & -20 & $\rightarrow$ & -12 & $\rightarrow$ & \color<5->{red}{-2} & \color<5->{red}{$\rightarrow$} & \color<4->{red}{-12} & $\rightarrow$ & -22 & $\rightarrow$ & -32 & \\ | |
& $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & \color<4->{red}{$\searrow$} & $\downarrow$ & $\searrow$ & $\downarrow$ & \\ | |
G & -30 & $\rightarrow$ & -22 & $\rightarrow$ & -12 & $\rightarrow$ & -4 & $\rightarrow$ & \color<3->{red}{-12} & $\rightarrow$ & -22 & \\ | |
& $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & $\searrow$ & $\downarrow$ & \color<3->{red}{$\searrow$} & $\downarrow$ & \\ | |
T & -40 & $\rightarrow$ & -32 & $\rightarrow$ & -22 & $\rightarrow$ & -14 & $\rightarrow$ & -9 & $\rightarrow$ & \color<2->{red}{-17} & | |
\end{tabular} | |
\end{frame} | |
\begin{frame}[fragile] | |
\frametitle{Output} | |
Plot the optimal alignment: | |
\begin{verbatim} | |
CTAGG | |
*| |* | |
TT-GT | |
\end{verbatim} | |
Score: -17 | |
Complexity in time: $O(nm)$ | |
Complexity in memory: $O(nm)$ | |
\end{frame} | |
\begin{frame} | |
\frametitle{Acknowledgments} | |
Bellman; Levenstein; Needleman and Wunsch; Sankoff and Sellers; Hirschberg; Smith and Waterman; Gotoh; Ukkonen, Myers and Fickett; and many others... | |
\vspace{1cm} | |
Want to know more? start reading! | |
\url{http://lectures.molgen.mpg.de/online_lectures.html} | |
\end{frame} | |
\end{document} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment