-
-
Save maasha/7b7ed954d52ae391f20c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'inline' | |
module Ambiguity | |
def add_ambiguity_macro inline_builder | |
# Macro for matching nucleotides including ambiguity codes. | |
inline_builder.prefix %{ | |
#define MATCH(A,B) ((bitmap[A] & bitmap[B]) != 0) | |
} | |
# Bitmap for matching nucleotides including ambiguity codes. | |
# For each value bits are set from the left: bit pos 1 for A, | |
# bit pos 2 for T, bit pos 3 for C, and bit pos 4 for G. | |
inline_builder.prefix %{ | |
char bitmap[256] = { | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0, | |
0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0, | |
0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0, | |
0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
}; | |
} | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/Users/maasha/biopieces/code_ruby/lib/maasha/seq/levenshtein.rb:51:in `block in <class:Levenshtein>': undefined method `add_ambiguity_macro' for Levenshtein:Class (NoMethodError) | |
from /usr/local/lib/ruby/gems/1.9.1/gems/RubyInline-3.12.1/lib/inline.rb:844:in `inline' | |
from /Users/maasha/biopieces/code_ruby/lib/maasha/seq/levenshtein.rb:50:in `<class:Levenshtein>' | |
from /Users/maasha/biopieces/code_ruby/lib/maasha/seq/levenshtein.rb:31:in `<top (required)>' | |
from /Users/maasha/biopieces/code_ruby/lib/maasha/seq.rb:394:in `edit_distance' | |
from ./test.rb:8:in `<main>' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'inline' | |
require 'maasha/seq/ambiguity' | |
# Class to calculate the Levenshtein distance between two | |
# given strings. | |
# http://en.wikipedia.org/wiki/Levenshtein_distance | |
class Levenshtein | |
include Ambiguity | |
BYTES_IN_INT = 4 | |
def self.distance(s, t) | |
return 0 if s == t; | |
return t.length if s.length == 0; | |
return s.length if t.length == 0; | |
v0 = "\0" * (t.length + 1) * BYTES_IN_INT | |
v1 = "\0" * (t.length + 1) * BYTES_IN_INT | |
l = self.new | |
l.distance_C(s, t, s.length, t.length, v0, v1) | |
end | |
# >>>>>>>>>>>>>>> RubyInline C code <<<<<<<<<<<<<<< | |
inline do |builder| | |
add_ambiguity_macro(builder) | |
builder.prefix %{ | |
unsigned int min(unsigned int a, unsigned int b, unsigned int c) | |
{ | |
unsigned int m = a; | |
if (m > b) m = b; | |
if (m > c) m = c; | |
return m; | |
} | |
} | |
builder.c %{ | |
VALUE distance_C( | |
VALUE _s, // string | |
VALUE _t, // string | |
VALUE _s_len, // string length | |
VALUE _t_len, // string length | |
VALUE _v0, // score vector | |
VALUE _v1 // score vector | |
) | |
{ | |
char *s = (char *) StringValuePtr(_s); | |
char *t = (char *) StringValuePtr(_t); | |
unsigned int s_len = FIX2UINT(_s_len); | |
unsigned int t_len = FIX2UINT(_t_len); | |
unsigned int *v0 = (unsigned int *) StringValuePtr(_v0); | |
unsigned int *v1 = (unsigned int *) StringValuePtr(_v1); | |
unsigned int i = 0; | |
unsigned int j = 0; | |
unsigned int cost = 0; | |
for (i = 0; i < t_len + 1; i++) | |
v0[i] = i; | |
for (i = 0; i < s_len; i++) | |
{ | |
v1[0] = i + 1; | |
for (j = 0; j < t_len; j++) | |
{ | |
cost = (MATCH(s[i], t[j])) ? 0 : 1; | |
v1[j + 1] = min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost); | |
} | |
for (j = 0; j < t_len + 1; j++) | |
v0[j] = v1[j]; | |
} | |
return UINT2NUM(v1[t_len]); | |
} | |
} | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment