Last active
February 13, 2021 21:35
-
-
Save gui11aume/a0a779ac988da34a6a551fbaddd9cc63 to your computer and use it in GitHub Desktop.
Learn Burrows-Wheeler indexing (vanilla)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#define L 14 // Length of the text. | |
// Global variables. | |
char TXT[L] = "GATGCGAGAGATG$"; | |
int SA[L] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13}; | |
char BWT[L] = {0}; | |
int C[4] = {0}; | |
int OCC[4][L] = {0}; | |
int NUM[256] = { | |
['A'] = 0, ['C'] = 1, ['G'] = 2, ['T'] = 3, | |
['a'] = 0, ['c'] = 1, ['g'] = 2, ['t'] = 3, | |
}; | |
int compare_suffixes (const void * a, const void * b) { | |
const char * suff_a = &TXT[*(int *)a]; | |
const char * suff_b = &TXT[*(int *)b]; | |
return strcmp(suff_a, suff_b); | |
} | |
void construct_BWT (void) { | |
for (int i = 0 ; i < L ; i++) { | |
if (SA[i] > 0) BWT[i] = TXT[SA[i]-1]; | |
else BWT[i] = '$'; | |
} | |
} | |
void construct_C_and_OCC (void) { | |
for (int i = 0 ; i < L ; i++) { | |
if (BWT[i] != '$') C[NUM[BWT[i]]]++; | |
for (int j = 0 ; j < 4 ; j++) OCC[j][i] = C[j]; | |
} | |
int tmp = 1; | |
for (int j = 0 ; j < 4 ; j++) { | |
tmp += C[j]; | |
C[j] = tmp - C[j]; | |
} | |
} | |
void backward_search (char * query) { | |
int bot = 0; | |
int top = L-1; | |
for (int pos = strlen(query)-1 ; pos > -1 ; pos--) { | |
int c = NUM[query[pos]]; | |
bot = C[c] + (bot > 0 ? OCC[c][bot-1] : 0); | |
top = C[c] + OCC[c][top]-1; | |
if (top < bot) break; | |
} | |
for (int i = bot ; i <= top ; i++) { | |
printf("%s: %d\n", query, SA[i]); | |
} | |
} | |
int main(void) { | |
qsort(SA, L, sizeof(int), compare_suffixes); | |
construct_BWT(); | |
construct_C_and_OCC(); | |
backward_search("GAGA"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See blog post commenting the code at the following link.
http://blog.thegrandlocus.com/2017/05/a-tutorial-on-burrows-wheeler-indexing-methods-2
Compile with
gcc -std=c99 -g -O0 learn_bwt_indexing_vanilla.c -o run
and execute with./run
.Start debug session with
gdb run
. In the session typeb main
and thenr
.