Skip to content

Instantly share code, notes, and snippets.

@mattwildig
Created November 9, 2011 22:44
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattwildig/1353401 to your computer and use it in GitHub Desktop.
Save mattwildig/1353401 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'pp'
require 'inline'
class Scan
def initialize(seq)
@seq = seq
end
inline do |builder|
builder.prefix %{
#define MATCH(A,B) ((equal[A] & equal[B]) != 0)
}
builder.prefix %{
int equal[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0,
0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0,
0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0,
0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
}
# ss is the start of the string, used only for reporting the match endpoints.
builder.c_raw %{
int backtrack(char* ss, char* s, char* p, int mm, int ins, int del)
{
int r = 0;
while (*s && MATCH(*s, *p)) ++s, ++p; // OK to always match longest segment
if (!*p)
return (int)((s - ss) - 1);
else
{
if (mm && *s && *p && (r = backtrack(ss, s + 1, p + 1, mm - 1, ins, del))) return r;
if (ins && *s && (r = backtrack(ss, s + 1, p, mm, ins - 1, del))) return r;
if (del && *p && (r = backtrack(ss, s, p + 1, mm, ins, del - 1))) return r;
}
return 0;
}
}
# Find all occurrences of p starting at any position in s, with at most
# mm mismatches, ins insertions and del deletions.
builder.c %{
int patscan(char* p, int mm, int ins, int del)
{
//char* s = StringValuePtr(rb_iv_get(self, "@seq"));
VALUE seq = rb_iv_get(self, "@seq");
char* s = StringValuePtr(seq);
char* ss;
int end;
for (ss = s; *s; ++s)
{
end = backtrack(ss, s, p, mm, ins, del);
if (end)
return end;
}
}
}
end
end
seq = "tcatcgagtcatcgatcgatcgatcgatcga"
pat = "gtcatcga"
scanner = Scan.new(seq)
puts scanner.patscan(pat, 0, 0, 0)
__END__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment