Skip to content

Instantly share code, notes, and snippets.

@yagays
Created November 11, 2012 01:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yagays/4053263 to your computer and use it in GitHub Desktop.
Save yagays/4053263 to your computer and use it in GitHub Desktop.
#/usr/bin/env ruby
require 'pp'
def segment(text, segs)
words = []
last = 0
for i in 0..segs.length
if segs[i] == "1"
words << text[last..i]
last = i + 1
end
end
words << text[last..segs.length]
words
end
def evaluate(text, segs)
words = segment(text, segs)
text_size = words.length
lexicon_size = words.uniq.join(" ").length
return text_size + lexicon_size
end
def flip(segs, pos)
if segs[pos] == "0"
segs[pos] = "1"
else
segs[pos] = "0"
end
segs
end
def flip_n(segs, n)
n.times{
segs = flip(segs, rand(segs.length-1))
}
segs
end
def anneal(text, segs, iterations, cooling_rate)
temperature = segs.length.to_f
while temperature > 0.5
best_segs,best = segs, evaluate(text, segs)
iterations.times{
guess = flip_n(segs, temperature.round)
score = evaluate(text, guess)
if score < best
best, best_segs = score, guess
end
}
score, segs = best, best_segs
temperature = temperature / cooling_rate
puts evaluate(text, segs).to_s + " " + segment(text, segs).join(" ")
end
segs
end
if __FILE__ == $PROGRAM_NAME
text = "doyouseethekittyseethedoggydoyoulikethekittylikethedoggy"
seg1 = "0000000000000001000000000010000000000000000100000000000"
seg2 = "0100100100100001001001000010100100010010000100010010000"
seg3 = "0000100100000011001000000110000100010000001100010000001"
anneal(text, seg1, 5000, 1.2)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment