Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Learn Burrows-Wheeler indexing (vanilla)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define L 14 // Length of the text.
// Global variables.
char TXT[L] = "GATGCGAGAGATG$";
int SA[L] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13};
char BWT[L] = {0};
int C[4] = {0};
int OCC[4][L] = {0};
int NUM[256] = {
['A'] = 0, ['C'] = 1, ['G'] = 2, ['T'] = 3,
['a'] = 0, ['c'] = 1, ['g'] = 2, ['t'] = 3,
};
int compare_suffixes (const void * a, const void * b) {
const char * suff_a = &TXT[*(int *)a];
const char * suff_b = &TXT[*(int *)b];
return strcmp(suff_a, suff_b);
}
void construct_BWT (void) {
for (int i = 0 ; i < L ; i++) {
if (SA[i] > 0) BWT[i] = TXT[SA[i]-1];
else BWT[i] = '$';
}
}
void construct_C_and_OCC (void) {
for (int i = 0 ; i < L ; i++) {
if (BWT[i] != '$') C[NUM[BWT[i]]]++;
for (int j = 0 ; j < 4 ; j++) OCC[j][i] = C[j];
}
int tmp = 1;
for (int j = 0 ; j < 4 ; j++) {
tmp += C[j];
C[j] = tmp - C[j];
}
}
void backward_search (char * query) {
int bot = 1;
int top = L-1;
for (int pos = strlen(query)-1 ; pos > -1 ; pos--) {
int c = NUM[query[pos]];
bot = C[c] + OCC[c][bot-1];
top = C[c] + OCC[c][top]-1;
if (top < bot) break;
}
for (int i = bot ; i <= top ; i++) {
printf("%s: %d\n", query, SA[i]);
}
}
int main(void) {
qsort(SA, L, sizeof(int), compare_suffixes);
construct_BWT();
construct_C_and_OCC();
backward_search("GAGA");
}
@gui11aume

This comment has been minimized.

Copy link
Owner Author

gui11aume commented May 7, 2017

See blog post commenting the code at the following link.
http://blog.thegrandlocus.com/2017/05/a-tutorial-on-burrows-wheeler-indexing-methods-2

Compile with gcc -std=c99 -g -O0 learn_bwt_indexing_vanilla.c -o run and execute with ./run.
Start debug session with gdb run. In the session type b main and then r.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.