Last active
December 24, 2015 03:29
-
-
Save markandrus/6737361 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <float.h> | |
#include <math.h> | |
#include <stdbool.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
// Table | |
// ----- | |
#define NONE DBL_MAX | |
typedef double maybe_float; | |
struct Table { | |
size_t rows; | |
size_t cols; | |
maybe_float *table; | |
}; | |
typedef struct Table *Table; | |
Table new_table(size_t rows, size_t cols) { | |
Table table = malloc(sizeof(struct Table)); | |
assert(table != NULL); | |
table->rows = rows; | |
table->cols = cols; | |
table->table = malloc(rows * cols * sizeof(maybe_float)); | |
int i; | |
for (i = 0; i <= rows * cols; i++) | |
table->table[i] = NONE; | |
return table; | |
} | |
void free_table(Table *table) { | |
assert(table != NULL); | |
assert(*table != NULL); | |
assert((*table)->table != NULL); | |
free((*table)->table); | |
free(*table); | |
*table = NULL; | |
} | |
maybe_float get(Table table, size_t x, size_t y) { | |
assert(table != NULL); | |
assert(table->table != NULL); | |
assert(table->rows > x); | |
assert(table->cols > y); | |
return table->table[table->cols * x + y]; | |
} | |
float set(Table table, size_t x, size_t y, float v) { | |
assert(table != NULL); | |
assert(table->table != NULL); | |
assert(table->rows > x); | |
assert(table->cols > y); | |
assert(table->table[table->cols * x + y] == NONE); | |
return (float) (table->table[table->cols * x + y] = v); | |
} | |
// Configuration | |
// ------------- | |
typedef float (*cost_fn)(void *, void *); | |
struct Config { | |
size_t size; | |
void *a; | |
void *b; | |
size_t len_a; | |
size_t len_b; | |
Table table; | |
float ins_del; | |
cost_fn cost; | |
}; | |
typedef struct Config *Config; | |
void *elem_of_a(Config config, size_t i) { | |
return config->a + (--i * config->size); | |
} | |
void *elem_of_b(Config config, size_t j) { | |
return config->b + (--j * config->size); | |
} | |
void free_config(Config *config) { | |
assert(config != NULL); | |
assert(*config != NULL); | |
free_table(&(*config)->table); | |
free(*config); | |
*config = NULL; | |
} | |
// Minimum-Cost Sequence-Alignment | |
// ------------------------------- | |
#define min3(X, Y, Z) ((X) < (Y) && (X) < (Z) ? X : \ | |
((Y) < (X) && (Y) < (Z) ? Y : Z)) | |
float calc_cost(Config config) { | |
int i, j; | |
size_t len_a = config->len_a, | |
len_b = config->len_b; | |
float ins_del = config->ins_del; | |
cost_fn cost = config->cost; | |
Table table = config->table; | |
set(table, 0, 0, 0); | |
for (i=1; i<len_a; i++) | |
set(table, i, 0, i * ins_del); | |
for (i=1; i<len_b; i++) | |
set(table, 0, i, i * ins_del); | |
for (j=1; j<len_b+1; j++) { | |
for (i=1; i<len_a+1; i++) { | |
set(table, i, j, min3( | |
get(table, i-1, j) + ins_del, | |
get(table, i, j-1) + ins_del, | |
get(table, i-1, j-1) + cost(elem_of_a(config, i), elem_of_b(config, j)) | |
)); | |
} | |
} | |
return get(table, config->len_a, config->len_b); | |
} | |
typedef void (*traverse_fn)(void *, void *); | |
void traverse_alignment_r(Config config, traverse_fn fn, size_t i, size_t j) { | |
float ins_del = config->ins_del; | |
cost_fn cost = config->cost; | |
Table table = config->table; | |
if (i == 0 && j == 0) { | |
return; | |
} else if (i == 0 && j != 0) { | |
fn(NULL, elem_of_b(config, j)); | |
traverse_alignment_r(config, fn, 0, j-1); | |
} else if (i != 0 && j == 0) { | |
fn(elem_of_a(config, i), NULL); | |
traverse_alignment_r(config, fn, i-1, 0); | |
} else { | |
void *e_a = elem_of_a(config, i), | |
*e_b = elem_of_b(config, j); | |
float assoc = get(table, i-1, j-1) + cost(e_a, e_b), | |
ins_del_l = get(table, i, j-1) + ins_del, | |
ins_del_r = get(table, i-1, j) + ins_del; | |
if (assoc <= ins_del_l && assoc <= ins_del_r) { | |
fn(e_a, e_b); | |
traverse_alignment_r(config, fn, i-1, j-1); | |
} else if (ins_del_l <= assoc && ins_del_l <= ins_del_r) { | |
fn(NULL, e_b); | |
traverse_alignment_r(config, fn, i, j-1); | |
} else { | |
fn(e_a, NULL); | |
traverse_alignment_r(config, fn, i-1, j); | |
} | |
} | |
} | |
void traverse_alignment(Config config, traverse_fn fn) { | |
traverse_alignment_r(config, fn, config->len_a, config->len_b); | |
} | |
// String Configuration | |
// -------------------- | |
bool vowel(char a) { | |
switch (a) { | |
case 'a': return true; | |
case 'e': return true; | |
case 'i': return true; | |
case 'o': return true; | |
case 'u': return true; | |
default: return false; | |
} | |
} | |
float cost_string(char *as, char *bs) { | |
char a = as[0], | |
b = bs[0]; | |
if (a == b) | |
return 0; | |
else if (vowel(a) && vowel(b)) | |
return 0.5; | |
else if (vowel(a) || vowel(b)) | |
return 1.2; | |
else | |
return 0.6; | |
} | |
void print_string(char *as, char *bs) { | |
char a = as == NULL ? ' ' : as[0], | |
b = bs == NULL ? ' ' : bs[0]; | |
fprintf(stderr, "%c %c\n", a, b); | |
} | |
Config new_string_config(char *a, char *b) { | |
Config config = malloc(sizeof(struct Config)); | |
config->size = sizeof(char); | |
config->a = a; | |
config->b = b; | |
config->len_a = strlen(a); | |
config->len_b = strlen(b); | |
config->table = new_table(config->len_a + 1, config->len_b + 1); | |
config->ins_del = 1; | |
config->cost = (cost_fn) cost_string; | |
return config; | |
} | |
// Fingerprint | |
// ----------- | |
struct Fingerprint { | |
size_t length; | |
int *data; | |
}; | |
typedef struct Fingerprint *Fingerprint; | |
Fingerprint unsafe_parse_fingerprint(void *fp) { | |
char *line = NULL; | |
size_t i, linecap = 0; | |
// Allocate the fingerprint. | |
Fingerprint fingerprint = calloc(1, sizeof(struct Fingerprint)); | |
assert(fingerprint != NULL); | |
// Parse header. | |
getline(&line, &linecap, fp); | |
assert(line != NULL); | |
sscanf(line, "%zu", &fingerprint->length); | |
// Allocate room for the finerprint's data. | |
fingerprint->data = malloc(fingerprint->length * sizeof(int)); | |
assert(fingerprint->data != NULL); | |
// Read in the fingerprint's data. | |
for (i=0; i<fingerprint->length; i++) { | |
getline(&line, &linecap, fp); | |
assert(line != NULL); | |
sscanf(line, "%d", fingerprint->data + i); | |
} | |
free(line); | |
return fingerprint; | |
} | |
void free_fingerprint(Fingerprint *a) { | |
assert(a != NULL); | |
assert(*a != NULL); | |
assert((*a)->data != NULL); | |
free((*a)->data); | |
free(*a); | |
*a = NULL; | |
} | |
// Fingerprint Configuration | |
// ------------------------- | |
int take_min(Fingerprint a) { | |
assert(a != NULL); | |
assert(a->length > 0); | |
assert(a->data != NULL); | |
int i, min = a->data[0]; | |
for (i = 1; i < a->length; i++) | |
if (a->data[i] < min) | |
min = a->data[i]; | |
return min; | |
} | |
int take_max(Fingerprint a) { | |
assert(a != NULL); | |
assert(a->length > 0); | |
assert(a->data != NULL); | |
int i, max = a->data[0]; | |
for (i = 1; i < a->length; i++) | |
if (a->data[i] > max) | |
max = a->data[i]; | |
return max; | |
} | |
float calc_ins_del(Fingerprint a, Fingerprint b) { | |
int min_a = take_min(a), | |
min_b = take_min(b), | |
max_a = take_max(a), | |
max_b = take_max(b), | |
l, r; | |
l = abs(max_a - min_b); | |
r = abs(max_b - min_a); | |
return l > r ? l : r; | |
} | |
float cost_fingerprint(int *as, int *bs) { | |
int a = as[0], | |
b = bs[0]; | |
return abs(a - b); | |
} | |
void print_fingerprint(int *as, int *bs) { | |
if (as != NULL) | |
fprintf(stderr, "%d\t", *as); | |
else | |
fprintf(stderr, "\t"); | |
if (bs != NULL) | |
fprintf(stderr, "%d\n", *bs); | |
else | |
fprintf(stderr, "\n"); | |
} | |
Config new_fingerprint_config(Fingerprint a, Fingerprint b) { | |
Config config = malloc(sizeof(struct Config)); | |
config->size = sizeof(int); | |
config->a = a->data; | |
config->b = b->data; | |
config->len_a = a->length; | |
config->len_b = b->length; | |
config->table = new_table(config->len_a + 1, config->len_b + 1); | |
config->ins_del = calc_ins_del(a, b); | |
config->cost = (cost_fn) cost_fingerprint; | |
return config; | |
} | |
// Main | |
// ---- | |
int unsafe_string_main() { | |
char *a, *b; | |
size_t linecap = 0; | |
getline(&a, &linecap, stdin); | |
getline(&b, &linecap, stdin); | |
Config config = new_string_config(a, b); | |
float cost = calc_cost(config); | |
fprintf(stderr, "Cost: %f\n", cost); | |
fprintf(stderr, "\nAlignment:\n"); | |
traverse_alignment(config, (traverse_fn) print_string); | |
free_config(&config); | |
free(a); | |
free(b); | |
printf("%f\n", cost); | |
return 0; | |
} | |
int unsafe_fingerprint_main() { | |
Fingerprint a = unsafe_parse_fingerprint(stdin), | |
b = unsafe_parse_fingerprint(stdin); | |
Config config = new_fingerprint_config(a, b); | |
float cost = calc_cost(config); | |
fprintf(stderr, "Cost: %f\n", cost); | |
fprintf(stderr, "\nAlignment:\n"); | |
traverse_alignment(config, (traverse_fn) print_fingerprint); | |
free_config(&config); | |
free_fingerprint(&a); | |
free_fingerprint(&b); | |
printf("%f\n", cost); | |
return 0; | |
} | |
int main (int argc, char **argv) { | |
// return unsafe_string_main(); | |
return unsafe_fingerprint_main(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment