Skip to content

Instantly share code, notes, and snippets.

@MurageKibicho
Created June 11, 2024 07:55
Show Gist options
  • Save MurageKibicho/4a0aebfff1e5379efa501ff85b880c1d to your computer and use it in GitHub Desktop.
Save MurageKibicho/4a0aebfff1e5379efa501ff85b880c1d to your computer and use it in GitHub Desktop.
Bijective burrows wheeler C forward transform
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Function to compute the Lyndon factorization using Duval's algorithm
void lyndonFactor(const char *s, char **result, int *count, int maxLen) {
int n = strlen(s);
int k = 0, factorCount = 0;
while (k < n) {
int i = k, j = k + 1;
while (j < n && s[i] <= s[j]) {
if (s[i] == s[j]) {
i++;
} else {
i = k;
}
j++;
}
int oldk = k;
k = k + j - i;
strncpy(result[factorCount], s + oldk, k - oldk);
result[factorCount][k - oldk] = '\0';
factorCount++;
}
*count = factorCount;
}
// Infinite lexicographical order key
void infLexOrderKey(char *a, int l) {
int originalLen = strlen(a);
while (strlen(a) < l) {
strncat(a, a, l - strlen(a));
if (strlen(a) > l) {
a[l] = '\0';
}
}
}
// Function to get all the rotations of the Lyndon factors of s
void lyndonConjugates(const char *s, char **result, int *count, int maxLen) {
char **factors = (char **)malloc(maxLen * sizeof(char *));
for (int i = 0; i < maxLen; i++) {
factors[i] = (char *)malloc(maxLen * sizeof(char));
}
int factorCount = 0;
lyndonFactor(s, factors, &factorCount, maxLen);
int totalCount = 0;
for (int i = 0; i < factorCount; i++) {
int len = strlen(factors[i]);
for (int j = 0; j < len; j++) {
strncpy(result[totalCount], factors[i] + len - j, j);
strncat(result[totalCount], factors[i], len - j);
result[totalCount][len] = '\0';
totalCount++;
}
}
*count = totalCount;
for (int i = 0; i < maxLen; i++) {
free(factors[i]);
}
free(factors);
}
// Comparison function for qsort to sort strings lexicographically considering infinite lex order
int infLexOrderCompare(const void *a, const void *b) {
char *strA = *(char **)a;
char *strB = *(char **)b;
int maxLen = strlen(strA) > strlen(strB) ? strlen(strA) : strlen(strB);
maxLen *= 2; // to safely extend the strings
char *extendedA = (char *)malloc(maxLen + 1);
char *extendedB = (char *)malloc(maxLen + 1);
strncpy(extendedA, strA, maxLen);
strncpy(extendedB, strB, maxLen);
infLexOrderKey(extendedA, maxLen);
infLexOrderKey(extendedB, maxLen);
int result = strcmp(extendedA, extendedB);
free(extendedA);
free(extendedB);
return result;
}
// Function to perform the Bijective Burrows-Wheeler Transform (BWTS)
void bwts(const char *s, char *result, int maxLen) {
char **conjs = (char **)malloc(maxLen * sizeof(char *));
for (int i = 0; i < maxLen; i++) {
conjs[i] = (char *)malloc(maxLen * sizeof(char));
}
int conjsCount = 0;
lyndonConjugates(s, conjs, &conjsCount, maxLen);
qsort(conjs, conjsCount, sizeof(char *), infLexOrderCompare);
for (int i = 0; i < conjsCount; i++) {
result[i] = conjs[i][strlen(conjs[i]) - 1];
}
result[conjsCount] = '\0';
for (int i = 0; i < maxLen; i++) {
free(conjs[i]);
}
free(conjs);
}
int main() {
char input[] = "86754321sdafghfjghdfgsd";
int maxLen = 256;
char *bwtOutput = (char *)malloc(maxLen * sizeof(char));
char *inverseOutput = (char *)malloc(maxLen * sizeof(char));
bwts(input, bwtOutput, maxLen);
printf("Input: %s\n", input);
printf("Bijective BWT: %s\n", bwtOutput);
free(bwtOutput);
free(inverseOutput);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment