Skip to content

Instantly share code, notes, and snippets.

@betamike
Created August 31, 2010 15:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save betamike/559176 to your computer and use it in GitHub Desktop.
Save betamike/559176 to your computer and use it in GitHub Desktop.
#!/usr/bin/ruby
def compute_suffix_table(pattern)
suffix = []
index = 0
cur = pattern.length - 1
pattern.reverse.each_char do |c|
if index == 0
(1...pattern.length).reverse_each do |i|
if c != pattern[i]
suffix[index] = (pattern.length - 1) - i
break
end
end
suffix[index] = suffix[index] || pattern.length
else
first = cur + 1
last = -1
match = pattern[first..last]
(1...pattern.length).each do |shift|
first -= 1
last -= 1
chunk = pattern[first..last]
if (chunk == match) and (c != pattern[first-1])
suffix[index] = shift
break
end
end
#do additional check for the string matching
#at the beginning of the fragment
if !suffix[index]
match = match[1...match.length]
while match
if match == pattern[0...match.length]
suffix[index] = pattern.length - match.length
break
else
match = match[1...match.length]
end
end
end
suffix[index] = suffix[index] || pattern.length
end
index += 1
cur -= 1
end
return suffix
end
def compute_table2(pattern)
within = {}
distance = 1
pattern[0...(pattern.length-1)].reverse.each_char do |c|
within[c] = within[c] || distance
distance += 1
end
return within
end
pattern = ARGV[-1]
paths = ARGV[0..-2]
text = ""
paths.each do |a|
#concat all file text into one massive string
File.open(a, 'r') do |file|
text << file.read
text << "\n"
end
end
if text.empty?
puts "Nothing to search!"
exit
end
t1 = compute_suffix_table(pattern)
t2 = compute_table2(pattern)
index = pattern.length - 1
last = text.length
until index > last
tindex = index
pindex = pattern.length - 1
while text[tindex] == pattern[pindex]
if pindex == 0
puts "Found Match"
break
end
pindex -= 1
tindex -= 1
end
jump1 = t1[(pattern.length - 1) - pindex]
jump2 = t2[text[index]] || pattern.length
index += [jump1, jump2].max
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment