Skip to content

Instantly share code, notes, and snippets.

@markandrus
Last active December 24, 2015 03:29
Show Gist options
  • Save markandrus/6737361 to your computer and use it in GitHub Desktop.
Save markandrus/6737361 to your computer and use it in GitHub Desktop.
#include <assert.h>
#include <float.h>
#include <math.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
// Table
// -----
#define NONE DBL_MAX
typedef double maybe_float;
struct Table {
size_t rows;
size_t cols;
maybe_float *table;
};
typedef struct Table *Table;
Table new_table(size_t rows, size_t cols) {
Table table = malloc(sizeof(struct Table));
assert(table != NULL);
table->rows = rows;
table->cols = cols;
table->table = malloc(rows * cols * sizeof(maybe_float));
int i;
for (i = 0; i <= rows * cols; i++)
table->table[i] = NONE;
return table;
}
void free_table(Table *table) {
assert(table != NULL);
assert(*table != NULL);
assert((*table)->table != NULL);
free((*table)->table);
free(*table);
*table = NULL;
}
maybe_float get(Table table, size_t x, size_t y) {
assert(table != NULL);
assert(table->table != NULL);
assert(table->rows > x);
assert(table->cols > y);
return table->table[table->cols * x + y];
}
float set(Table table, size_t x, size_t y, float v) {
assert(table != NULL);
assert(table->table != NULL);
assert(table->rows > x);
assert(table->cols > y);
assert(table->table[table->cols * x + y] == NONE);
return (float) (table->table[table->cols * x + y] = v);
}
// Configuration
// -------------
typedef float (*cost_fn)(void *, void *);
struct Config {
size_t size;
void *a;
void *b;
size_t len_a;
size_t len_b;
Table table;
float ins_del;
cost_fn cost;
};
typedef struct Config *Config;
void *elem_of_a(Config config, size_t i) {
return config->a + (--i * config->size);
}
void *elem_of_b(Config config, size_t j) {
return config->b + (--j * config->size);
}
void free_config(Config *config) {
assert(config != NULL);
assert(*config != NULL);
free_table(&(*config)->table);
free(*config);
*config = NULL;
}
// Minimum-Cost Sequence-Alignment
// -------------------------------
#define min3(X, Y, Z) ((X) < (Y) && (X) < (Z) ? X : \
((Y) < (X) && (Y) < (Z) ? Y : Z))
float calc_cost(Config config) {
int i, j;
size_t len_a = config->len_a,
len_b = config->len_b;
float ins_del = config->ins_del;
cost_fn cost = config->cost;
Table table = config->table;
set(table, 0, 0, 0);
for (i=1; i<len_a; i++)
set(table, i, 0, i * ins_del);
for (i=1; i<len_b; i++)
set(table, 0, i, i * ins_del);
for (j=1; j<len_b+1; j++) {
for (i=1; i<len_a+1; i++) {
set(table, i, j, min3(
get(table, i-1, j) + ins_del,
get(table, i, j-1) + ins_del,
get(table, i-1, j-1) + cost(elem_of_a(config, i), elem_of_b(config, j))
));
}
}
return get(table, config->len_a, config->len_b);
}
typedef void (*traverse_fn)(void *, void *);
void traverse_alignment_r(Config config, traverse_fn fn, size_t i, size_t j) {
float ins_del = config->ins_del;
cost_fn cost = config->cost;
Table table = config->table;
if (i == 0 && j == 0) {
return;
} else if (i == 0 && j != 0) {
fn(NULL, elem_of_b(config, j));
traverse_alignment_r(config, fn, 0, j-1);
} else if (i != 0 && j == 0) {
fn(elem_of_a(config, i), NULL);
traverse_alignment_r(config, fn, i-1, 0);
} else {
void *e_a = elem_of_a(config, i),
*e_b = elem_of_b(config, j);
float assoc = get(table, i-1, j-1) + cost(e_a, e_b),
ins_del_l = get(table, i, j-1) + ins_del,
ins_del_r = get(table, i-1, j) + ins_del;
if (assoc <= ins_del_l && assoc <= ins_del_r) {
fn(e_a, e_b);
traverse_alignment_r(config, fn, i-1, j-1);
} else if (ins_del_l <= assoc && ins_del_l <= ins_del_r) {
fn(NULL, e_b);
traverse_alignment_r(config, fn, i, j-1);
} else {
fn(e_a, NULL);
traverse_alignment_r(config, fn, i-1, j);
}
}
}
void traverse_alignment(Config config, traverse_fn fn) {
traverse_alignment_r(config, fn, config->len_a, config->len_b);
}
// String Configuration
// --------------------
bool vowel(char a) {
switch (a) {
case 'a': return true;
case 'e': return true;
case 'i': return true;
case 'o': return true;
case 'u': return true;
default: return false;
}
}
float cost_string(char *as, char *bs) {
char a = as[0],
b = bs[0];
if (a == b)
return 0;
else if (vowel(a) && vowel(b))
return 0.5;
else if (vowel(a) || vowel(b))
return 1.2;
else
return 0.6;
}
void print_string(char *as, char *bs) {
char a = as == NULL ? ' ' : as[0],
b = bs == NULL ? ' ' : bs[0];
fprintf(stderr, "%c %c\n", a, b);
}
Config new_string_config(char *a, char *b) {
Config config = malloc(sizeof(struct Config));
config->size = sizeof(char);
config->a = a;
config->b = b;
config->len_a = strlen(a);
config->len_b = strlen(b);
config->table = new_table(config->len_a + 1, config->len_b + 1);
config->ins_del = 1;
config->cost = (cost_fn) cost_string;
return config;
}
// Fingerprint
// -----------
struct Fingerprint {
size_t length;
int *data;
};
typedef struct Fingerprint *Fingerprint;
Fingerprint unsafe_parse_fingerprint(void *fp) {
char *line = NULL;
size_t i, linecap = 0;
// Allocate the fingerprint.
Fingerprint fingerprint = calloc(1, sizeof(struct Fingerprint));
assert(fingerprint != NULL);
// Parse header.
getline(&line, &linecap, fp);
assert(line != NULL);
sscanf(line, "%zu", &fingerprint->length);
// Allocate room for the finerprint's data.
fingerprint->data = malloc(fingerprint->length * sizeof(int));
assert(fingerprint->data != NULL);
// Read in the fingerprint's data.
for (i=0; i<fingerprint->length; i++) {
getline(&line, &linecap, fp);
assert(line != NULL);
sscanf(line, "%d", fingerprint->data + i);
}
free(line);
return fingerprint;
}
void free_fingerprint(Fingerprint *a) {
assert(a != NULL);
assert(*a != NULL);
assert((*a)->data != NULL);
free((*a)->data);
free(*a);
*a = NULL;
}
// Fingerprint Configuration
// -------------------------
int take_min(Fingerprint a) {
assert(a != NULL);
assert(a->length > 0);
assert(a->data != NULL);
int i, min = a->data[0];
for (i = 1; i < a->length; i++)
if (a->data[i] < min)
min = a->data[i];
return min;
}
int take_max(Fingerprint a) {
assert(a != NULL);
assert(a->length > 0);
assert(a->data != NULL);
int i, max = a->data[0];
for (i = 1; i < a->length; i++)
if (a->data[i] > max)
max = a->data[i];
return max;
}
float calc_ins_del(Fingerprint a, Fingerprint b) {
int min_a = take_min(a),
min_b = take_min(b),
max_a = take_max(a),
max_b = take_max(b),
l, r;
l = abs(max_a - min_b);
r = abs(max_b - min_a);
return l > r ? l : r;
}
float cost_fingerprint(int *as, int *bs) {
int a = as[0],
b = bs[0];
return abs(a - b);
}
void print_fingerprint(int *as, int *bs) {
if (as != NULL)
fprintf(stderr, "%d\t", *as);
else
fprintf(stderr, "\t");
if (bs != NULL)
fprintf(stderr, "%d\n", *bs);
else
fprintf(stderr, "\n");
}
Config new_fingerprint_config(Fingerprint a, Fingerprint b) {
Config config = malloc(sizeof(struct Config));
config->size = sizeof(int);
config->a = a->data;
config->b = b->data;
config->len_a = a->length;
config->len_b = b->length;
config->table = new_table(config->len_a + 1, config->len_b + 1);
config->ins_del = calc_ins_del(a, b);
config->cost = (cost_fn) cost_fingerprint;
return config;
}
// Main
// ----
int unsafe_string_main() {
char *a, *b;
size_t linecap = 0;
getline(&a, &linecap, stdin);
getline(&b, &linecap, stdin);
Config config = new_string_config(a, b);
float cost = calc_cost(config);
fprintf(stderr, "Cost: %f\n", cost);
fprintf(stderr, "\nAlignment:\n");
traverse_alignment(config, (traverse_fn) print_string);
free_config(&config);
free(a);
free(b);
printf("%f\n", cost);
return 0;
}
int unsafe_fingerprint_main() {
Fingerprint a = unsafe_parse_fingerprint(stdin),
b = unsafe_parse_fingerprint(stdin);
Config config = new_fingerprint_config(a, b);
float cost = calc_cost(config);
fprintf(stderr, "Cost: %f\n", cost);
fprintf(stderr, "\nAlignment:\n");
traverse_alignment(config, (traverse_fn) print_fingerprint);
free_config(&config);
free_fingerprint(&a);
free_fingerprint(&b);
printf("%f\n", cost);
return 0;
}
int main (int argc, char **argv) {
// return unsafe_string_main();
return unsafe_fingerprint_main();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment