Created
November 9, 2011 22:44
-
-
Save mattwildig/1353401 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'pp' | |
require 'inline' | |
class Scan | |
def initialize(seq) | |
@seq = seq | |
end | |
inline do |builder| | |
builder.prefix %{ | |
#define MATCH(A,B) ((equal[A] & equal[B]) != 0) | |
} | |
builder.prefix %{ | |
int equal[256] = { | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0, | |
0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0, | |
0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0, | |
0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
}; | |
} | |
# ss is the start of the string, used only for reporting the match endpoints. | |
builder.c_raw %{ | |
int backtrack(char* ss, char* s, char* p, int mm, int ins, int del) | |
{ | |
int r = 0; | |
while (*s && MATCH(*s, *p)) ++s, ++p; // OK to always match longest segment | |
if (!*p) | |
return (int)((s - ss) - 1); | |
else | |
{ | |
if (mm && *s && *p && (r = backtrack(ss, s + 1, p + 1, mm - 1, ins, del))) return r; | |
if (ins && *s && (r = backtrack(ss, s + 1, p, mm, ins - 1, del))) return r; | |
if (del && *p && (r = backtrack(ss, s, p + 1, mm, ins, del - 1))) return r; | |
} | |
return 0; | |
} | |
} | |
# Find all occurrences of p starting at any position in s, with at most | |
# mm mismatches, ins insertions and del deletions. | |
builder.c %{ | |
int patscan(char* p, int mm, int ins, int del) | |
{ | |
//char* s = StringValuePtr(rb_iv_get(self, "@seq")); | |
VALUE seq = rb_iv_get(self, "@seq"); | |
char* s = StringValuePtr(seq); | |
char* ss; | |
int end; | |
for (ss = s; *s; ++s) | |
{ | |
end = backtrack(ss, s, p, mm, ins, del); | |
if (end) | |
return end; | |
} | |
} | |
} | |
end | |
end | |
seq = "tcatcgagtcatcgatcgatcgatcgatcga" | |
pat = "gtcatcga" | |
scanner = Scan.new(seq) | |
puts scanner.patscan(pat, 0, 0, 0) | |
__END__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment