Skip to content

Instantly share code, notes, and snippets.

@makenowjust
Last active June 22, 2017 23:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save makenowjust/363ae118b266e77e576a828acaad59a0 to your computer and use it in GitHub Desktop.
Save makenowjust/363ae118b266e77e576a828acaad59a0 to your computer and use it in GitHub Desktop.
require "benchmark"
require "colorize"
require "http"
require "spec/dsl"
class String
def byte_index_naive(string : String, offset = 0)
offset += bytesize if offset < 0
return nil if offset < 0
end_pos = bytesize - string.bytesize
offset.upto(end_pos) do |pos|
if (to_unsafe + pos).memcmp(string.to_unsafe, string.bytesize) == 0
return pos
end
end
nil
end
def byte_index_rabin_karp(search : String, offset = 0)
offset += bytesize if offset < 0
return if offset < 0
return bytesize < offset ? nil : offset if search.empty?
# Rabin-Karp algorithm
# https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
search_hash = 0u32
search.each_byte do |b|
search_hash = search_hash * PRIME_RK + b
end
pow = PRIME_RK ** search.bytesize
pointer = head_pointer = to_unsafe + offset
hash_end_pointer = pointer + search.bytesize
end_pointer = to_unsafe + bytesize
hash = 0u32
return if hash_end_pointer > end_pointer
while pointer < hash_end_pointer
hash = hash * PRIME_RK + pointer.value
pointer += 1
end
while true
if hash == search_hash && head_pointer.memcmp(search.to_unsafe, search.bytesize) == 0
return offset
end
return if pointer >= end_pointer
hash = hash * PRIME_RK + pointer.value - pow * head_pointer.value
pointer += 1
head_pointer += 1
offset += 1
end
nil
end
end
unless File.exists?("mems.txt")
print "Downloading mems.txt (The Memoirs of Sherlock Holmes)...".colorize(:red)
File.open("mems.txt", "w") do |f|
HTTP::Client.get("https://sherlock-holm.es/stories/plain-text/mems.txt") do |response|
IO.copy response.body_io, f
end
end
puts " done".colorize(:green)
end
source = File.read("mems.txt")
short_source = <<-SHORT
Crystal is great, i love Crystal.
Lorem dolor sip amet, Crystal is great
SHORT
target = "crystal"
n = source.byte_index(target)
short_n = short_source.byte_index(target)
puts "For long string:"
Benchmark.ips do |x|
x.report("naive") { source.byte_index_naive(target).should eq n }
x.report("rabin_karp") { source.byte_index_rabin_karp(target).should eq n }
end
puts "For short string:"
Benchmark.ips do |x|
x.report("naive") { short_source.byte_index_naive(target).should eq short_n }
x.report("rabin_karp") { short_source.byte_index_rabin_karp(target).should eq short_n }
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment